diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index f6169489..27e3b8df 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -877,8 +877,11 @@ impl Simd for Avx2 { __m128i::from(a).simd_into(self) } #[inline(always)] - fn splat_mask8x16(self, val: i8) -> mask8x16 { - unsafe { _mm_set1_epi8(val).simd_into(self) } + fn splat_mask8x16(self, val: bool) -> mask8x16 { + unsafe { + let val: i8 = if val { !0 } else { 0 }; + _mm_set1_epi8(val).simd_into(self) + } } #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { @@ -888,82 +891,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { - mask8x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x16(self, a: mask8x16) -> [i8; 16usize] { unsafe { core::mem::transmute::<__m128i, [i8; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize] { - unsafe { core::mem::transmute::<&__m128i, &[i8; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i8; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { - unsafe { - mask8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x16(self, a: mask8x16) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask8x16(b).val.0, - self.cvt_to_bytes_mask8x16(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x16(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - self.slide_mask8x16::(a, b) - } - #[inline(always)] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -977,7 +908,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask8x16(self, a: mask8x16) -> mask8x16 { - a ^ !0 + self.xor_mask8x16(a, self.splat_mask8x16(true)) } #[inline(always)] fn select_mask8x16( @@ -1437,8 +1368,11 @@ impl Simd for Avx2 { __m128i::from(a).simd_into(self) } #[inline(always)] - fn splat_mask16x8(self, val: i16) -> mask16x8 { - unsafe { _mm_set1_epi16(val).simd_into(self) } + fn splat_mask16x8(self, val: bool) -> mask16x8 { + unsafe { + let val: i16 = if val { !0 } else { 0 }; + _mm_set1_epi16(val).simd_into(self) + } } #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { @@ -1448,82 +1382,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { - mask16x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x8(self, a: mask16x8) -> [i16; 8usize] { unsafe { core::mem::transmute::<__m128i, [i16; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize] { - unsafe { core::mem::transmute::<&__m128i, &[i16; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i16; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { - unsafe { - mask16x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x8(self, a: mask16x8) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask16x8(b).val.0, - self.cvt_to_bytes_mask16x8(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x8(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - self.slide_mask16x8::(a, b) - } - #[inline(always)] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -1537,7 +1399,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask16x8(self, a: mask16x8) -> mask16x8 { - a ^ !0 + self.xor_mask16x8(a, self.splat_mask16x8(true)) } #[inline(always)] fn select_mask16x8( @@ -2007,8 +1869,11 @@ impl Simd for Avx2 { } } #[inline(always)] - fn splat_mask32x4(self, val: i32) -> mask32x4 { - unsafe { _mm_set1_epi32(val).simd_into(self) } + fn splat_mask32x4(self, val: bool) -> mask32x4 { + unsafe { + let val: i32 = if val { !0 } else { 0 }; + _mm_set1_epi32(val).simd_into(self) + } } #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { @@ -2018,82 +1883,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { - mask32x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize] { unsafe { core::mem::transmute::<__m128i, [i32; 4usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize] { - unsafe { core::mem::transmute::<&__m128i, &[i32; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i32; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { - unsafe { - mask32x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x4(self, a: mask32x4) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - unsafe { - if SHIFT >= 4usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask32x4(b).val.0, - self.cvt_to_bytes_mask32x4(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x4(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - self.slide_mask32x4::(a, b) - } - #[inline(always)] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -2107,7 +1900,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask32x4(self, a: mask32x4) -> mask32x4 { - a ^ !0 + self.xor_mask32x4(a, self.splat_mask32x4(true)) } #[inline(always)] fn select_mask32x4( @@ -2378,8 +2171,11 @@ impl Simd for Avx2 { unsafe { _mm_castpd_ps(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask64x2(self, val: i64) -> mask64x2 { - unsafe { _mm_set1_epi64x(val).simd_into(self) } + fn splat_mask64x2(self, val: bool) -> mask64x2 { + unsafe { + let val: i64 = if val { !0 } else { 0 }; + _mm_set1_epi64x(val).simd_into(self) + } } #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { @@ -2389,82 +2185,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { - mask64x2 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x2(self, a: mask64x2) -> [i64; 2usize] { unsafe { core::mem::transmute::<__m128i, [i64; 2usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize] { - unsafe { core::mem::transmute::<&__m128i, &[i64; 2usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i64; 2usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 2usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { - unsafe { - mask64x2 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x2(self, a: mask64x2) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - unsafe { - if SHIFT >= 2usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask64x2(b).val.0, - self.cvt_to_bytes_mask64x2(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x2(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - self.slide_mask64x2::(a, b) - } - #[inline(always)] fn and_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -2478,7 +2202,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask64x2(self, a: mask64x2) -> mask64x2 { - a ^ !0 + self.xor_mask64x2(a, self.splat_mask64x2(true)) } #[inline(always)] fn select_mask64x2( @@ -3528,8 +3252,11 @@ impl Simd for Avx2 { __m256i::from(a).simd_into(self) } #[inline(always)] - fn splat_mask8x32(self, val: i8) -> mask8x32 { - unsafe { _mm256_set1_epi8(val).simd_into(self) } + fn splat_mask8x32(self, val: bool) -> mask8x32 { + unsafe { + let val: i8 = if val { !0 } else { 0 }; + _mm256_set1_epi8(val).simd_into(self) + } } #[inline(always)] fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32 { @@ -3539,95 +3266,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { - mask8x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x32(self, a: mask8x32) -> [i8; 32usize] { unsafe { core::mem::transmute::<__m256i, [i8; 32usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize] { - unsafe { core::mem::transmute::<&__m256i, &[i8; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize] { - unsafe { core::mem::transmute::<&mut __m256i, &mut [i8; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { - unsafe { - mask8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x32(self, a: mask8x32) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - unsafe { - if SHIFT >= 32usize { - return b; - } - let result = cross_block_alignr_256x1( - self.cvt_to_bytes_mask8x32(b).val.0, - self.cvt_to_bytes_mask8x32(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x32(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = dyn_alignr_256( - self.cvt_to_bytes_mask8x32(b).val.0, - self.cvt_to_bytes_mask8x32(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x32(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] fn and_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) } } @@ -3641,7 +3283,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask8x32(self, a: mask8x32) -> mask8x32 { - a ^ !0 + self.xor_mask8x32(a, self.splat_mask8x32(true)) } #[inline(always)] fn select_mask8x32( @@ -4303,8 +3945,11 @@ impl Simd for Avx2 { __m256i::from(a).simd_into(self) } #[inline(always)] - fn splat_mask16x16(self, val: i16) -> mask16x16 { - unsafe { _mm256_set1_epi16(val).simd_into(self) } + fn splat_mask16x16(self, val: bool) -> mask16x16 { + unsafe { + let val: i16 = if val { !0 } else { 0 }; + _mm256_set1_epi16(val).simd_into(self) + } } #[inline(always)] fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16 { @@ -4314,95 +3959,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { - mask16x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x16(self, a: mask16x16) -> [i16; 16usize] { unsafe { core::mem::transmute::<__m256i, [i16; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize] { - unsafe { core::mem::transmute::<&__m256i, &[i16; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize] { - unsafe { core::mem::transmute::<&mut __m256i, &mut [i16; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { - unsafe { - mask16x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x16(self, a: mask16x16) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = cross_block_alignr_256x1( - self.cvt_to_bytes_mask16x16(b).val.0, - self.cvt_to_bytes_mask16x16(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x16(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = dyn_alignr_256( - self.cvt_to_bytes_mask16x16(b).val.0, - self.cvt_to_bytes_mask16x16(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x16(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] fn and_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) } } @@ -4416,7 +3976,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask16x16(self, a: mask16x16) -> mask16x16 { - a ^ !0 + self.xor_mask16x16(a, self.splat_mask16x16(true)) } #[inline(always)] fn select_mask16x16( @@ -5023,8 +4583,11 @@ impl Simd for Avx2 { } } #[inline(always)] - fn splat_mask32x8(self, val: i32) -> mask32x8 { - unsafe { _mm256_set1_epi32(val).simd_into(self) } + fn splat_mask32x8(self, val: bool) -> mask32x8 { + unsafe { + let val: i32 = if val { !0 } else { 0 }; + _mm256_set1_epi32(val).simd_into(self) + } } #[inline(always)] fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8 { @@ -5034,95 +4597,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { - mask32x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize] { unsafe { core::mem::transmute::<__m256i, [i32; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize] { - unsafe { core::mem::transmute::<&__m256i, &[i32; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize] { - unsafe { core::mem::transmute::<&mut __m256i, &mut [i32; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { - unsafe { - mask32x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x8(self, a: mask32x8) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = cross_block_alignr_256x1( - self.cvt_to_bytes_mask32x8(b).val.0, - self.cvt_to_bytes_mask32x8(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x8(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - unsafe { - if SHIFT >= 4usize { - return b; - } - let result = dyn_alignr_256( - self.cvt_to_bytes_mask32x8(b).val.0, - self.cvt_to_bytes_mask32x8(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x8(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] fn and_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) } } @@ -5136,7 +4614,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask32x8(self, a: mask32x8) -> mask32x8 { - a ^ !0 + self.xor_mask32x8(a, self.splat_mask32x8(true)) } #[inline(always)] fn select_mask32x8( @@ -5482,106 +4960,24 @@ impl Simd for Avx2 { unsafe { _mm256_castpd_ps(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask64x4(self, val: i64) -> mask64x4 { - unsafe { _mm256_set1_epi64x(val).simd_into(self) } - } - #[inline(always)] - fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { - mask64x4 { - val: unsafe { core::mem::transmute_copy(&val) }, - simd: self, - } - } - #[inline(always)] - fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { - mask64x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] - fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { - unsafe { core::mem::transmute::<__m256i, [i64; 4usize]>(a.val.0) } - } - #[inline(always)] - fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize] { - unsafe { core::mem::transmute::<&__m256i, &[i64; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize] { - unsafe { core::mem::transmute::<&mut __m256i, &mut [i64; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { + fn splat_mask64x4(self, val: bool) -> mask64x4 { unsafe { - mask64x4 { - val: core::mem::transmute(a.val), - simd: self, - } + let val: i64 = if val { !0 } else { 0 }; + _mm256_set1_epi64x(val).simd_into(self) } } #[inline(always)] - fn cvt_to_bytes_mask64x4(self, a: mask64x4) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - unsafe { - if SHIFT >= 4usize { - return b; - } - let result = cross_block_alignr_256x1( - self.cvt_to_bytes_mask64x4(b).val.0, - self.cvt_to_bytes_mask64x4(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x4(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - unsafe { - if SHIFT >= 2usize { - return b; - } - let result = dyn_alignr_256( - self.cvt_to_bytes_mask64x4(b).val.0, - self.cvt_to_bytes_mask64x4(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x4(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) + fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { + mask64x4 { + val: unsafe { core::mem::transmute_copy(&val) }, + simd: self, } } #[inline(always)] + fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { + unsafe { core::mem::transmute::<__m256i, [i64; 4usize]>(a.val.0) } + } + #[inline(always)] fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { unsafe { _mm256_and_si256(a.into(), b.into()).simd_into(self) } } @@ -5595,7 +4991,7 @@ impl Simd for Avx2 { } #[inline(always)] fn not_mask64x4(self, a: mask64x4) -> mask64x4 { - a ^ !0 + self.xor_mask64x4(a, self.splat_mask64x4(true)) } #[inline(always)] fn select_mask64x4( @@ -6664,7 +6060,7 @@ impl Simd for Avx2 { ) } #[inline(always)] - fn splat_mask8x64(self, val: i8) -> mask8x64 { + fn splat_mask8x64(self, val: bool) -> mask8x64 { let half = self.splat_mask8x32(val); self.combine_mask8x32(half, half) } @@ -6676,87 +6072,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { - mask8x64 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x64(self, a: mask8x64) -> [i8; 64usize] { unsafe { core::mem::transmute::<[__m256i; 2usize], [i8; 64usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize] { - unsafe { core::mem::transmute::<&[__m256i; 2usize], &[i8; 64usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize] { - unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i8; 64usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 64usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { - unsafe { - mask8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x64(self, a: mask8x64) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - unsafe { - if SHIFT >= 64usize { - return b; - } - let result = cross_block_alignr_256x2( - self.cvt_to_bytes_mask8x64(b).val.0, - self.cvt_to_bytes_mask8x64(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x64(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - let (a0, a1) = self.split_mask8x64(a); - let (b0, b1) = self.split_mask8x64(b); - self.combine_mask8x32( - self.slide_within_blocks_mask8x32::(a0, b0), - self.slide_within_blocks_mask8x32::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64 { let (a0, a1) = self.split_mask8x64(a); let (b0, b1) = self.split_mask8x64(b); @@ -7479,7 +6798,7 @@ impl Simd for Avx2 { ) } #[inline(always)] - fn splat_mask16x32(self, val: i16) -> mask16x32 { + fn splat_mask16x32(self, val: bool) -> mask16x32 { let half = self.splat_mask16x16(val); self.combine_mask16x16(half, half) } @@ -7491,87 +6810,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { - mask16x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x32(self, a: mask16x32) -> [i16; 32usize] { unsafe { core::mem::transmute::<[__m256i; 2usize], [i16; 32usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize] { - unsafe { core::mem::transmute::<&[__m256i; 2usize], &[i16; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize] { - unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i16; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { - unsafe { - mask16x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x32(self, a: mask16x32) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - unsafe { - if SHIFT >= 32usize { - return b; - } - let result = cross_block_alignr_256x2( - self.cvt_to_bytes_mask16x32(b).val.0, - self.cvt_to_bytes_mask16x32(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x32(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - let (a0, a1) = self.split_mask16x32(a); - let (b0, b1) = self.split_mask16x32(b); - self.combine_mask16x16( - self.slide_within_blocks_mask16x16::(a0, b0), - self.slide_within_blocks_mask16x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32 { let (a0, a1) = self.split_mask16x32(a); let (b0, b1) = self.split_mask16x32(b); @@ -8258,7 +7500,7 @@ impl Simd for Avx2 { self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1)) } #[inline(always)] - fn splat_mask32x16(self, val: i32) -> mask32x16 { + fn splat_mask32x16(self, val: bool) -> mask32x16 { let half = self.splat_mask32x8(val); self.combine_mask32x8(half, half) } @@ -8270,87 +7512,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { - mask32x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x16(self, a: mask32x16) -> [i32; 16usize] { unsafe { core::mem::transmute::<[__m256i; 2usize], [i32; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize] { - unsafe { core::mem::transmute::<&[__m256i; 2usize], &[i32; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize] { - unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i32; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { - unsafe { - mask32x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x16(self, a: mask32x16) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = cross_block_alignr_256x2( - self.cvt_to_bytes_mask32x16(b).val.0, - self.cvt_to_bytes_mask32x16(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x16(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - let (a0, a1) = self.split_mask32x16(a); - let (b0, b1) = self.split_mask32x16(b); - self.combine_mask32x8( - self.slide_within_blocks_mask32x8::(a0, b0), - self.slide_within_blocks_mask32x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16 { let (a0, a1) = self.split_mask32x16(a); let (b0, b1) = self.split_mask32x16(b); @@ -8748,7 +7913,7 @@ impl Simd for Avx2 { ) } #[inline(always)] - fn splat_mask64x8(self, val: i64) -> mask64x8 { + fn splat_mask64x8(self, val: bool) -> mask64x8 { let half = self.splat_mask64x4(val); self.combine_mask64x4(half, half) } @@ -8760,87 +7925,10 @@ impl Simd for Avx2 { } } #[inline(always)] - fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { - mask64x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x8(self, a: mask64x8) -> [i64; 8usize] { unsafe { core::mem::transmute::<[__m256i; 2usize], [i64; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize] { - unsafe { core::mem::transmute::<&[__m256i; 2usize], &[i64; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize] { - unsafe { core::mem::transmute::<&mut [__m256i; 2usize], &mut [i64; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { - unsafe { - mask64x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x8(self, a: mask64x8) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = cross_block_alignr_256x2( - self.cvt_to_bytes_mask64x8(b).val.0, - self.cvt_to_bytes_mask64x8(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x8(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - let (a0, a1) = self.split_mask64x8(a); - let (b0, b1) = self.split_mask64x8(b); - self.combine_mask64x4( - self.slide_within_blocks_mask64x4::(a0, b0), - self.slide_within_blocks_mask64x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8 { let (a0, a1) = self.split_mask64x8(a); let (b0, b1) = self.split_mask64x8(b); diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index 2449b569..43e8fd1b 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -473,10 +473,26 @@ impl Simd for Fallback { #[inline(always)] fn select_f32x4(self, a: mask32x4, b: f32x4, c: f32x4) -> f32x4 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, ] .simd_into(self) } @@ -996,42 +1012,82 @@ impl Simd for Fallback { #[inline(always)] fn select_i8x16(self, a: mask8x16, b: i8x16, c: i8x16) -> i8x16 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, - if a[4usize] != 0 { b[4usize] } else { c[4usize] }, - if a[5usize] != 0 { b[5usize] } else { c[5usize] }, - if a[6usize] != 0 { b[6usize] } else { c[6usize] }, - if a[7usize] != 0 { b[7usize] } else { c[7usize] }, - if a[8usize] != 0 { b[8usize] } else { c[8usize] }, - if a[9usize] != 0 { b[9usize] } else { c[9usize] }, - if a[10usize] != 0 { + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, + if a.val.0[4usize] != 0 { + b[4usize] + } else { + c[4usize] + }, + if a.val.0[5usize] != 0 { + b[5usize] + } else { + c[5usize] + }, + if a.val.0[6usize] != 0 { + b[6usize] + } else { + c[6usize] + }, + if a.val.0[7usize] != 0 { + b[7usize] + } else { + c[7usize] + }, + if a.val.0[8usize] != 0 { + b[8usize] + } else { + c[8usize] + }, + if a.val.0[9usize] != 0 { + b[9usize] + } else { + c[9usize] + }, + if a.val.0[10usize] != 0 { b[10usize] } else { c[10usize] }, - if a[11usize] != 0 { + if a.val.0[11usize] != 0 { b[11usize] } else { c[11usize] }, - if a[12usize] != 0 { + if a.val.0[12usize] != 0 { b[12usize] } else { c[12usize] }, - if a[13usize] != 0 { + if a.val.0[13usize] != 0 { b[13usize] } else { c[13usize] }, - if a[14usize] != 0 { + if a.val.0[14usize] != 0 { b[14usize] } else { c[14usize] }, - if a[15usize] != 0 { + if a.val.0[15usize] != 0 { b[15usize] } else { c[15usize] @@ -1585,42 +1641,82 @@ impl Simd for Fallback { #[inline(always)] fn select_u8x16(self, a: mask8x16, b: u8x16, c: u8x16) -> u8x16 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, - if a[4usize] != 0 { b[4usize] } else { c[4usize] }, - if a[5usize] != 0 { b[5usize] } else { c[5usize] }, - if a[6usize] != 0 { b[6usize] } else { c[6usize] }, - if a[7usize] != 0 { b[7usize] } else { c[7usize] }, - if a[8usize] != 0 { b[8usize] } else { c[8usize] }, - if a[9usize] != 0 { b[9usize] } else { c[9usize] }, - if a[10usize] != 0 { + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, + if a.val.0[4usize] != 0 { + b[4usize] + } else { + c[4usize] + }, + if a.val.0[5usize] != 0 { + b[5usize] + } else { + c[5usize] + }, + if a.val.0[6usize] != 0 { + b[6usize] + } else { + c[6usize] + }, + if a.val.0[7usize] != 0 { + b[7usize] + } else { + c[7usize] + }, + if a.val.0[8usize] != 0 { + b[8usize] + } else { + c[8usize] + }, + if a.val.0[9usize] != 0 { + b[9usize] + } else { + c[9usize] + }, + if a.val.0[10usize] != 0 { b[10usize] } else { c[10usize] }, - if a[11usize] != 0 { + if a.val.0[11usize] != 0 { b[11usize] } else { c[11usize] }, - if a[12usize] != 0 { + if a.val.0[12usize] != 0 { b[12usize] } else { c[12usize] }, - if a[13usize] != 0 { + if a.val.0[13usize] != 0 { b[13usize] } else { c[13usize] }, - if a[14usize] != 0 { + if a.val.0[14usize] != 0 { b[14usize] } else { c[14usize] }, - if a[15usize] != 0 { + if a.val.0[15usize] != 0 { b[15usize] } else { c[15usize] @@ -1706,7 +1802,8 @@ impl Simd for Fallback { a.bitcast() } #[inline(always)] - fn splat_mask8x16(self, val: i8) -> mask8x16 { + fn splat_mask8x16(self, val: bool) -> mask8x16 { + let val: i8 = if val { !0 } else { 0 }; [val; 16usize].simd_into(self) } #[inline(always)] @@ -1717,150 +1814,94 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { - mask8x16 { - val: crate::support::Aligned128(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask8x16(self, a: mask8x16) -> [i8; 16usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { - unsafe { - mask8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x16(self, a: mask8x16) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - let mut dest = [Default::default(); 16usize]; - dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - self.slide_mask8x16::(a, b) - } - #[inline(always)] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { [ - i8::bitand(a[0usize], &b[0usize]), - i8::bitand(a[1usize], &b[1usize]), - i8::bitand(a[2usize], &b[2usize]), - i8::bitand(a[3usize], &b[3usize]), - i8::bitand(a[4usize], &b[4usize]), - i8::bitand(a[5usize], &b[5usize]), - i8::bitand(a[6usize], &b[6usize]), - i8::bitand(a[7usize], &b[7usize]), - i8::bitand(a[8usize], &b[8usize]), - i8::bitand(a[9usize], &b[9usize]), - i8::bitand(a[10usize], &b[10usize]), - i8::bitand(a[11usize], &b[11usize]), - i8::bitand(a[12usize], &b[12usize]), - i8::bitand(a[13usize], &b[13usize]), - i8::bitand(a[14usize], &b[14usize]), - i8::bitand(a[15usize], &b[15usize]), + i8::bitand(a.val.0[0usize], &b.val.0[0usize]), + i8::bitand(a.val.0[1usize], &b.val.0[1usize]), + i8::bitand(a.val.0[2usize], &b.val.0[2usize]), + i8::bitand(a.val.0[3usize], &b.val.0[3usize]), + i8::bitand(a.val.0[4usize], &b.val.0[4usize]), + i8::bitand(a.val.0[5usize], &b.val.0[5usize]), + i8::bitand(a.val.0[6usize], &b.val.0[6usize]), + i8::bitand(a.val.0[7usize], &b.val.0[7usize]), + i8::bitand(a.val.0[8usize], &b.val.0[8usize]), + i8::bitand(a.val.0[9usize], &b.val.0[9usize]), + i8::bitand(a.val.0[10usize], &b.val.0[10usize]), + i8::bitand(a.val.0[11usize], &b.val.0[11usize]), + i8::bitand(a.val.0[12usize], &b.val.0[12usize]), + i8::bitand(a.val.0[13usize], &b.val.0[13usize]), + i8::bitand(a.val.0[14usize], &b.val.0[14usize]), + i8::bitand(a.val.0[15usize], &b.val.0[15usize]), ] .simd_into(self) } #[inline(always)] fn or_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { [ - i8::bitor(a[0usize], &b[0usize]), - i8::bitor(a[1usize], &b[1usize]), - i8::bitor(a[2usize], &b[2usize]), - i8::bitor(a[3usize], &b[3usize]), - i8::bitor(a[4usize], &b[4usize]), - i8::bitor(a[5usize], &b[5usize]), - i8::bitor(a[6usize], &b[6usize]), - i8::bitor(a[7usize], &b[7usize]), - i8::bitor(a[8usize], &b[8usize]), - i8::bitor(a[9usize], &b[9usize]), - i8::bitor(a[10usize], &b[10usize]), - i8::bitor(a[11usize], &b[11usize]), - i8::bitor(a[12usize], &b[12usize]), - i8::bitor(a[13usize], &b[13usize]), - i8::bitor(a[14usize], &b[14usize]), - i8::bitor(a[15usize], &b[15usize]), + i8::bitor(a.val.0[0usize], &b.val.0[0usize]), + i8::bitor(a.val.0[1usize], &b.val.0[1usize]), + i8::bitor(a.val.0[2usize], &b.val.0[2usize]), + i8::bitor(a.val.0[3usize], &b.val.0[3usize]), + i8::bitor(a.val.0[4usize], &b.val.0[4usize]), + i8::bitor(a.val.0[5usize], &b.val.0[5usize]), + i8::bitor(a.val.0[6usize], &b.val.0[6usize]), + i8::bitor(a.val.0[7usize], &b.val.0[7usize]), + i8::bitor(a.val.0[8usize], &b.val.0[8usize]), + i8::bitor(a.val.0[9usize], &b.val.0[9usize]), + i8::bitor(a.val.0[10usize], &b.val.0[10usize]), + i8::bitor(a.val.0[11usize], &b.val.0[11usize]), + i8::bitor(a.val.0[12usize], &b.val.0[12usize]), + i8::bitor(a.val.0[13usize], &b.val.0[13usize]), + i8::bitor(a.val.0[14usize], &b.val.0[14usize]), + i8::bitor(a.val.0[15usize], &b.val.0[15usize]), ] .simd_into(self) } #[inline(always)] fn xor_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { [ - i8::bitxor(a[0usize], &b[0usize]), - i8::bitxor(a[1usize], &b[1usize]), - i8::bitxor(a[2usize], &b[2usize]), - i8::bitxor(a[3usize], &b[3usize]), - i8::bitxor(a[4usize], &b[4usize]), - i8::bitxor(a[5usize], &b[5usize]), - i8::bitxor(a[6usize], &b[6usize]), - i8::bitxor(a[7usize], &b[7usize]), - i8::bitxor(a[8usize], &b[8usize]), - i8::bitxor(a[9usize], &b[9usize]), - i8::bitxor(a[10usize], &b[10usize]), - i8::bitxor(a[11usize], &b[11usize]), - i8::bitxor(a[12usize], &b[12usize]), - i8::bitxor(a[13usize], &b[13usize]), - i8::bitxor(a[14usize], &b[14usize]), - i8::bitxor(a[15usize], &b[15usize]), + i8::bitxor(a.val.0[0usize], &b.val.0[0usize]), + i8::bitxor(a.val.0[1usize], &b.val.0[1usize]), + i8::bitxor(a.val.0[2usize], &b.val.0[2usize]), + i8::bitxor(a.val.0[3usize], &b.val.0[3usize]), + i8::bitxor(a.val.0[4usize], &b.val.0[4usize]), + i8::bitxor(a.val.0[5usize], &b.val.0[5usize]), + i8::bitxor(a.val.0[6usize], &b.val.0[6usize]), + i8::bitxor(a.val.0[7usize], &b.val.0[7usize]), + i8::bitxor(a.val.0[8usize], &b.val.0[8usize]), + i8::bitxor(a.val.0[9usize], &b.val.0[9usize]), + i8::bitxor(a.val.0[10usize], &b.val.0[10usize]), + i8::bitxor(a.val.0[11usize], &b.val.0[11usize]), + i8::bitxor(a.val.0[12usize], &b.val.0[12usize]), + i8::bitxor(a.val.0[13usize], &b.val.0[13usize]), + i8::bitxor(a.val.0[14usize], &b.val.0[14usize]), + i8::bitxor(a.val.0[15usize], &b.val.0[15usize]), ] .simd_into(self) } #[inline(always)] fn not_mask8x16(self, a: mask8x16) -> mask8x16 { [ - i8::not(a[0usize]), - i8::not(a[1usize]), - i8::not(a[2usize]), - i8::not(a[3usize]), - i8::not(a[4usize]), - i8::not(a[5usize]), - i8::not(a[6usize]), - i8::not(a[7usize]), - i8::not(a[8usize]), - i8::not(a[9usize]), - i8::not(a[10usize]), - i8::not(a[11usize]), - i8::not(a[12usize]), - i8::not(a[13usize]), - i8::not(a[14usize]), - i8::not(a[15usize]), + i8::not(a.val.0[0usize]), + i8::not(a.val.0[1usize]), + i8::not(a.val.0[2usize]), + i8::not(a.val.0[3usize]), + i8::not(a.val.0[4usize]), + i8::not(a.val.0[5usize]), + i8::not(a.val.0[6usize]), + i8::not(a.val.0[7usize]), + i8::not(a.val.0[8usize]), + i8::not(a.val.0[9usize]), + i8::not(a.val.0[10usize]), + i8::not(a.val.0[11usize]), + i8::not(a.val.0[12usize]), + i8::not(a.val.0[13usize]), + i8::not(a.val.0[14usize]), + i8::not(a.val.0[15usize]), ] .simd_into(self) } @@ -1872,45 +1913,85 @@ impl Simd for Fallback { c: mask8x16, ) -> mask8x16 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, - if a[4usize] != 0 { b[4usize] } else { c[4usize] }, - if a[5usize] != 0 { b[5usize] } else { c[5usize] }, - if a[6usize] != 0 { b[6usize] } else { c[6usize] }, - if a[7usize] != 0 { b[7usize] } else { c[7usize] }, - if a[8usize] != 0 { b[8usize] } else { c[8usize] }, - if a[9usize] != 0 { b[9usize] } else { c[9usize] }, - if a[10usize] != 0 { - b[10usize] + if a.val.0[0usize] != 0 { + b.val.0[0usize] } else { - c[10usize] + c.val.0[0usize] }, - if a[11usize] != 0 { - b[11usize] + if a.val.0[1usize] != 0 { + b.val.0[1usize] } else { - c[11usize] + c.val.0[1usize] }, - if a[12usize] != 0 { - b[12usize] + if a.val.0[2usize] != 0 { + b.val.0[2usize] } else { - c[12usize] + c.val.0[2usize] }, - if a[13usize] != 0 { - b[13usize] + if a.val.0[3usize] != 0 { + b.val.0[3usize] } else { - c[13usize] + c.val.0[3usize] }, - if a[14usize] != 0 { - b[14usize] + if a.val.0[4usize] != 0 { + b.val.0[4usize] } else { - c[14usize] + c.val.0[4usize] }, - if a[15usize] != 0 { - b[15usize] + if a.val.0[5usize] != 0 { + b.val.0[5usize] } else { - c[15usize] + c.val.0[5usize] + }, + if a.val.0[6usize] != 0 { + b.val.0[6usize] + } else { + c.val.0[6usize] + }, + if a.val.0[7usize] != 0 { + b.val.0[7usize] + } else { + c.val.0[7usize] + }, + if a.val.0[8usize] != 0 { + b.val.0[8usize] + } else { + c.val.0[8usize] + }, + if a.val.0[9usize] != 0 { + b.val.0[9usize] + } else { + c.val.0[9usize] + }, + if a.val.0[10usize] != 0 { + b.val.0[10usize] + } else { + c.val.0[10usize] + }, + if a.val.0[11usize] != 0 { + b.val.0[11usize] + } else { + c.val.0[11usize] + }, + if a.val.0[12usize] != 0 { + b.val.0[12usize] + } else { + c.val.0[12usize] + }, + if a.val.0[13usize] != 0 { + b.val.0[13usize] + } else { + c.val.0[13usize] + }, + if a.val.0[14usize] != 0 { + b.val.0[14usize] + } else { + c.val.0[14usize] + }, + if a.val.0[15usize] != 0 { + b.val.0[15usize] + } else { + c.val.0[15usize] }, ] .simd_into(self) @@ -1918,100 +1999,100 @@ impl Simd for Fallback { #[inline(always)] fn simd_eq_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { [ - -(i8::eq(&a[0usize], &b[0usize]) as i8), - -(i8::eq(&a[1usize], &b[1usize]) as i8), - -(i8::eq(&a[2usize], &b[2usize]) as i8), - -(i8::eq(&a[3usize], &b[3usize]) as i8), - -(i8::eq(&a[4usize], &b[4usize]) as i8), - -(i8::eq(&a[5usize], &b[5usize]) as i8), - -(i8::eq(&a[6usize], &b[6usize]) as i8), - -(i8::eq(&a[7usize], &b[7usize]) as i8), - -(i8::eq(&a[8usize], &b[8usize]) as i8), - -(i8::eq(&a[9usize], &b[9usize]) as i8), - -(i8::eq(&a[10usize], &b[10usize]) as i8), - -(i8::eq(&a[11usize], &b[11usize]) as i8), - -(i8::eq(&a[12usize], &b[12usize]) as i8), - -(i8::eq(&a[13usize], &b[13usize]) as i8), - -(i8::eq(&a[14usize], &b[14usize]) as i8), - -(i8::eq(&a[15usize], &b[15usize]) as i8), + -(i8::eq(&a.val.0[0usize], &b.val.0[0usize]) as i8), + -(i8::eq(&a.val.0[1usize], &b.val.0[1usize]) as i8), + -(i8::eq(&a.val.0[2usize], &b.val.0[2usize]) as i8), + -(i8::eq(&a.val.0[3usize], &b.val.0[3usize]) as i8), + -(i8::eq(&a.val.0[4usize], &b.val.0[4usize]) as i8), + -(i8::eq(&a.val.0[5usize], &b.val.0[5usize]) as i8), + -(i8::eq(&a.val.0[6usize], &b.val.0[6usize]) as i8), + -(i8::eq(&a.val.0[7usize], &b.val.0[7usize]) as i8), + -(i8::eq(&a.val.0[8usize], &b.val.0[8usize]) as i8), + -(i8::eq(&a.val.0[9usize], &b.val.0[9usize]) as i8), + -(i8::eq(&a.val.0[10usize], &b.val.0[10usize]) as i8), + -(i8::eq(&a.val.0[11usize], &b.val.0[11usize]) as i8), + -(i8::eq(&a.val.0[12usize], &b.val.0[12usize]) as i8), + -(i8::eq(&a.val.0[13usize], &b.val.0[13usize]) as i8), + -(i8::eq(&a.val.0[14usize], &b.val.0[14usize]) as i8), + -(i8::eq(&a.val.0[15usize], &b.val.0[15usize]) as i8), ] .simd_into(self) } #[inline(always)] fn any_true_mask8x16(self, a: mask8x16) -> bool { - a[0usize] != 0 - || a[1usize] != 0 - || a[2usize] != 0 - || a[3usize] != 0 - || a[4usize] != 0 - || a[5usize] != 0 - || a[6usize] != 0 - || a[7usize] != 0 - || a[8usize] != 0 - || a[9usize] != 0 - || a[10usize] != 0 - || a[11usize] != 0 - || a[12usize] != 0 - || a[13usize] != 0 - || a[14usize] != 0 - || a[15usize] != 0 + a.val.0[0usize] != 0 + || a.val.0[1usize] != 0 + || a.val.0[2usize] != 0 + || a.val.0[3usize] != 0 + || a.val.0[4usize] != 0 + || a.val.0[5usize] != 0 + || a.val.0[6usize] != 0 + || a.val.0[7usize] != 0 + || a.val.0[8usize] != 0 + || a.val.0[9usize] != 0 + || a.val.0[10usize] != 0 + || a.val.0[11usize] != 0 + || a.val.0[12usize] != 0 + || a.val.0[13usize] != 0 + || a.val.0[14usize] != 0 + || a.val.0[15usize] != 0 } #[inline(always)] fn all_true_mask8x16(self, a: mask8x16) -> bool { - a[0usize] != 0 - && a[1usize] != 0 - && a[2usize] != 0 - && a[3usize] != 0 - && a[4usize] != 0 - && a[5usize] != 0 - && a[6usize] != 0 - && a[7usize] != 0 - && a[8usize] != 0 - && a[9usize] != 0 - && a[10usize] != 0 - && a[11usize] != 0 - && a[12usize] != 0 - && a[13usize] != 0 - && a[14usize] != 0 - && a[15usize] != 0 + a.val.0[0usize] != 0 + && a.val.0[1usize] != 0 + && a.val.0[2usize] != 0 + && a.val.0[3usize] != 0 + && a.val.0[4usize] != 0 + && a.val.0[5usize] != 0 + && a.val.0[6usize] != 0 + && a.val.0[7usize] != 0 + && a.val.0[8usize] != 0 + && a.val.0[9usize] != 0 + && a.val.0[10usize] != 0 + && a.val.0[11usize] != 0 + && a.val.0[12usize] != 0 + && a.val.0[13usize] != 0 + && a.val.0[14usize] != 0 + && a.val.0[15usize] != 0 } #[inline(always)] fn any_false_mask8x16(self, a: mask8x16) -> bool { - a[0usize] == 0 - || a[1usize] == 0 - || a[2usize] == 0 - || a[3usize] == 0 - || a[4usize] == 0 - || a[5usize] == 0 - || a[6usize] == 0 - || a[7usize] == 0 - || a[8usize] == 0 - || a[9usize] == 0 - || a[10usize] == 0 - || a[11usize] == 0 - || a[12usize] == 0 - || a[13usize] == 0 - || a[14usize] == 0 - || a[15usize] == 0 + a.val.0[0usize] == 0 + || a.val.0[1usize] == 0 + || a.val.0[2usize] == 0 + || a.val.0[3usize] == 0 + || a.val.0[4usize] == 0 + || a.val.0[5usize] == 0 + || a.val.0[6usize] == 0 + || a.val.0[7usize] == 0 + || a.val.0[8usize] == 0 + || a.val.0[9usize] == 0 + || a.val.0[10usize] == 0 + || a.val.0[11usize] == 0 + || a.val.0[12usize] == 0 + || a.val.0[13usize] == 0 + || a.val.0[14usize] == 0 + || a.val.0[15usize] == 0 } #[inline(always)] fn all_false_mask8x16(self, a: mask8x16) -> bool { - a[0usize] == 0 - && a[1usize] == 0 - && a[2usize] == 0 - && a[3usize] == 0 - && a[4usize] == 0 - && a[5usize] == 0 - && a[6usize] == 0 - && a[7usize] == 0 - && a[8usize] == 0 - && a[9usize] == 0 - && a[10usize] == 0 - && a[11usize] == 0 - && a[12usize] == 0 - && a[13usize] == 0 - && a[14usize] == 0 - && a[15usize] == 0 + a.val.0[0usize] == 0 + && a.val.0[1usize] == 0 + && a.val.0[2usize] == 0 + && a.val.0[3usize] == 0 + && a.val.0[4usize] == 0 + && a.val.0[5usize] == 0 + && a.val.0[6usize] == 0 + && a.val.0[7usize] == 0 + && a.val.0[8usize] == 0 + && a.val.0[9usize] == 0 + && a.val.0[10usize] == 0 + && a.val.0[11usize] == 0 + && a.val.0[12usize] == 0 + && a.val.0[13usize] == 0 + && a.val.0[14usize] == 0 + && a.val.0[15usize] == 0 } #[inline(always)] fn combine_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x32 { @@ -2350,14 +2431,46 @@ impl Simd for Fallback { #[inline(always)] fn select_i16x8(self, a: mask16x8, b: i16x8, c: i16x8) -> i16x8 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, - if a[4usize] != 0 { b[4usize] } else { c[4usize] }, - if a[5usize] != 0 { b[5usize] } else { c[5usize] }, - if a[6usize] != 0 { b[6usize] } else { c[6usize] }, - if a[7usize] != 0 { b[7usize] } else { c[7usize] }, + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, + if a.val.0[4usize] != 0 { + b[4usize] + } else { + c[4usize] + }, + if a.val.0[5usize] != 0 { + b[5usize] + } else { + c[5usize] + }, + if a.val.0[6usize] != 0 { + b[6usize] + } else { + c[6usize] + }, + if a.val.0[7usize] != 0 { + b[7usize] + } else { + c[7usize] + }, ] .simd_into(self) } @@ -2748,14 +2861,46 @@ impl Simd for Fallback { #[inline(always)] fn select_u16x8(self, a: mask16x8, b: u16x8, c: u16x8) -> u16x8 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, - if a[4usize] != 0 { b[4usize] } else { c[4usize] }, - if a[5usize] != 0 { b[5usize] } else { c[5usize] }, - if a[6usize] != 0 { b[6usize] } else { c[6usize] }, - if a[7usize] != 0 { b[7usize] } else { c[7usize] }, + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, + if a.val.0[4usize] != 0 { + b[4usize] + } else { + c[4usize] + }, + if a.val.0[5usize] != 0 { + b[5usize] + } else { + c[5usize] + }, + if a.val.0[6usize] != 0 { + b[6usize] + } else { + c[6usize] + }, + if a.val.0[7usize] != 0 { + b[7usize] + } else { + c[7usize] + }, ] .simd_into(self) } @@ -2803,7 +2948,8 @@ impl Simd for Fallback { a.bitcast() } #[inline(always)] - fn splat_mask16x8(self, val: i16) -> mask16x8 { + fn splat_mask16x8(self, val: bool) -> mask16x8 { + let val: i16 = if val { !0 } else { 0 }; [val; 8usize].simd_into(self) } #[inline(always)] @@ -2814,118 +2960,62 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { - mask16x8 { - val: crate::support::Aligned128(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask16x8(self, a: mask16x8) -> [i16; 8usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { - unsafe { - mask16x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x8(self, a: mask16x8) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - let mut dest = [Default::default(); 8usize]; - dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - self.slide_mask16x8::(a, b) - } - #[inline(always)] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { [ - i16::bitand(a[0usize], &b[0usize]), - i16::bitand(a[1usize], &b[1usize]), - i16::bitand(a[2usize], &b[2usize]), - i16::bitand(a[3usize], &b[3usize]), - i16::bitand(a[4usize], &b[4usize]), - i16::bitand(a[5usize], &b[5usize]), - i16::bitand(a[6usize], &b[6usize]), - i16::bitand(a[7usize], &b[7usize]), + i16::bitand(a.val.0[0usize], &b.val.0[0usize]), + i16::bitand(a.val.0[1usize], &b.val.0[1usize]), + i16::bitand(a.val.0[2usize], &b.val.0[2usize]), + i16::bitand(a.val.0[3usize], &b.val.0[3usize]), + i16::bitand(a.val.0[4usize], &b.val.0[4usize]), + i16::bitand(a.val.0[5usize], &b.val.0[5usize]), + i16::bitand(a.val.0[6usize], &b.val.0[6usize]), + i16::bitand(a.val.0[7usize], &b.val.0[7usize]), ] .simd_into(self) } #[inline(always)] fn or_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { [ - i16::bitor(a[0usize], &b[0usize]), - i16::bitor(a[1usize], &b[1usize]), - i16::bitor(a[2usize], &b[2usize]), - i16::bitor(a[3usize], &b[3usize]), - i16::bitor(a[4usize], &b[4usize]), - i16::bitor(a[5usize], &b[5usize]), - i16::bitor(a[6usize], &b[6usize]), - i16::bitor(a[7usize], &b[7usize]), + i16::bitor(a.val.0[0usize], &b.val.0[0usize]), + i16::bitor(a.val.0[1usize], &b.val.0[1usize]), + i16::bitor(a.val.0[2usize], &b.val.0[2usize]), + i16::bitor(a.val.0[3usize], &b.val.0[3usize]), + i16::bitor(a.val.0[4usize], &b.val.0[4usize]), + i16::bitor(a.val.0[5usize], &b.val.0[5usize]), + i16::bitor(a.val.0[6usize], &b.val.0[6usize]), + i16::bitor(a.val.0[7usize], &b.val.0[7usize]), ] .simd_into(self) } #[inline(always)] fn xor_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { [ - i16::bitxor(a[0usize], &b[0usize]), - i16::bitxor(a[1usize], &b[1usize]), - i16::bitxor(a[2usize], &b[2usize]), - i16::bitxor(a[3usize], &b[3usize]), - i16::bitxor(a[4usize], &b[4usize]), - i16::bitxor(a[5usize], &b[5usize]), - i16::bitxor(a[6usize], &b[6usize]), - i16::bitxor(a[7usize], &b[7usize]), + i16::bitxor(a.val.0[0usize], &b.val.0[0usize]), + i16::bitxor(a.val.0[1usize], &b.val.0[1usize]), + i16::bitxor(a.val.0[2usize], &b.val.0[2usize]), + i16::bitxor(a.val.0[3usize], &b.val.0[3usize]), + i16::bitxor(a.val.0[4usize], &b.val.0[4usize]), + i16::bitxor(a.val.0[5usize], &b.val.0[5usize]), + i16::bitxor(a.val.0[6usize], &b.val.0[6usize]), + i16::bitxor(a.val.0[7usize], &b.val.0[7usize]), ] .simd_into(self) } #[inline(always)] fn not_mask16x8(self, a: mask16x8) -> mask16x8 { [ - i16::not(a[0usize]), - i16::not(a[1usize]), - i16::not(a[2usize]), - i16::not(a[3usize]), - i16::not(a[4usize]), - i16::not(a[5usize]), - i16::not(a[6usize]), - i16::not(a[7usize]), + i16::not(a.val.0[0usize]), + i16::not(a.val.0[1usize]), + i16::not(a.val.0[2usize]), + i16::not(a.val.0[3usize]), + i16::not(a.val.0[4usize]), + i16::not(a.val.0[5usize]), + i16::not(a.val.0[6usize]), + i16::not(a.val.0[7usize]), ] .simd_into(self) } @@ -2937,74 +3027,106 @@ impl Simd for Fallback { c: mask16x8, ) -> mask16x8 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, - if a[4usize] != 0 { b[4usize] } else { c[4usize] }, - if a[5usize] != 0 { b[5usize] } else { c[5usize] }, - if a[6usize] != 0 { b[6usize] } else { c[6usize] }, - if a[7usize] != 0 { b[7usize] } else { c[7usize] }, + if a.val.0[0usize] != 0 { + b.val.0[0usize] + } else { + c.val.0[0usize] + }, + if a.val.0[1usize] != 0 { + b.val.0[1usize] + } else { + c.val.0[1usize] + }, + if a.val.0[2usize] != 0 { + b.val.0[2usize] + } else { + c.val.0[2usize] + }, + if a.val.0[3usize] != 0 { + b.val.0[3usize] + } else { + c.val.0[3usize] + }, + if a.val.0[4usize] != 0 { + b.val.0[4usize] + } else { + c.val.0[4usize] + }, + if a.val.0[5usize] != 0 { + b.val.0[5usize] + } else { + c.val.0[5usize] + }, + if a.val.0[6usize] != 0 { + b.val.0[6usize] + } else { + c.val.0[6usize] + }, + if a.val.0[7usize] != 0 { + b.val.0[7usize] + } else { + c.val.0[7usize] + }, ] .simd_into(self) } #[inline(always)] fn simd_eq_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { [ - -(i16::eq(&a[0usize], &b[0usize]) as i16), - -(i16::eq(&a[1usize], &b[1usize]) as i16), - -(i16::eq(&a[2usize], &b[2usize]) as i16), - -(i16::eq(&a[3usize], &b[3usize]) as i16), - -(i16::eq(&a[4usize], &b[4usize]) as i16), - -(i16::eq(&a[5usize], &b[5usize]) as i16), - -(i16::eq(&a[6usize], &b[6usize]) as i16), - -(i16::eq(&a[7usize], &b[7usize]) as i16), + -(i16::eq(&a.val.0[0usize], &b.val.0[0usize]) as i16), + -(i16::eq(&a.val.0[1usize], &b.val.0[1usize]) as i16), + -(i16::eq(&a.val.0[2usize], &b.val.0[2usize]) as i16), + -(i16::eq(&a.val.0[3usize], &b.val.0[3usize]) as i16), + -(i16::eq(&a.val.0[4usize], &b.val.0[4usize]) as i16), + -(i16::eq(&a.val.0[5usize], &b.val.0[5usize]) as i16), + -(i16::eq(&a.val.0[6usize], &b.val.0[6usize]) as i16), + -(i16::eq(&a.val.0[7usize], &b.val.0[7usize]) as i16), ] .simd_into(self) } #[inline(always)] fn any_true_mask16x8(self, a: mask16x8) -> bool { - a[0usize] != 0 - || a[1usize] != 0 - || a[2usize] != 0 - || a[3usize] != 0 - || a[4usize] != 0 - || a[5usize] != 0 - || a[6usize] != 0 - || a[7usize] != 0 + a.val.0[0usize] != 0 + || a.val.0[1usize] != 0 + || a.val.0[2usize] != 0 + || a.val.0[3usize] != 0 + || a.val.0[4usize] != 0 + || a.val.0[5usize] != 0 + || a.val.0[6usize] != 0 + || a.val.0[7usize] != 0 } #[inline(always)] fn all_true_mask16x8(self, a: mask16x8) -> bool { - a[0usize] != 0 - && a[1usize] != 0 - && a[2usize] != 0 - && a[3usize] != 0 - && a[4usize] != 0 - && a[5usize] != 0 - && a[6usize] != 0 - && a[7usize] != 0 + a.val.0[0usize] != 0 + && a.val.0[1usize] != 0 + && a.val.0[2usize] != 0 + && a.val.0[3usize] != 0 + && a.val.0[4usize] != 0 + && a.val.0[5usize] != 0 + && a.val.0[6usize] != 0 + && a.val.0[7usize] != 0 } #[inline(always)] fn any_false_mask16x8(self, a: mask16x8) -> bool { - a[0usize] == 0 - || a[1usize] == 0 - || a[2usize] == 0 - || a[3usize] == 0 - || a[4usize] == 0 - || a[5usize] == 0 - || a[6usize] == 0 - || a[7usize] == 0 + a.val.0[0usize] == 0 + || a.val.0[1usize] == 0 + || a.val.0[2usize] == 0 + || a.val.0[3usize] == 0 + || a.val.0[4usize] == 0 + || a.val.0[5usize] == 0 + || a.val.0[6usize] == 0 + || a.val.0[7usize] == 0 } #[inline(always)] fn all_false_mask16x8(self, a: mask16x8) -> bool { - a[0usize] == 0 - && a[1usize] == 0 - && a[2usize] == 0 - && a[3usize] == 0 - && a[4usize] == 0 - && a[5usize] == 0 - && a[6usize] == 0 - && a[7usize] == 0 + a.val.0[0usize] == 0 + && a.val.0[1usize] == 0 + && a.val.0[2usize] == 0 + && a.val.0[3usize] == 0 + && a.val.0[4usize] == 0 + && a.val.0[5usize] == 0 + && a.val.0[6usize] == 0 + && a.val.0[7usize] == 0 } #[inline(always)] fn combine_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x16 { @@ -3267,10 +3389,26 @@ impl Simd for Fallback { #[inline(always)] fn select_i32x4(self, a: mask32x4, b: i32x4, c: i32x4) -> i32x4 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, ] .simd_into(self) } @@ -3583,10 +3721,26 @@ impl Simd for Fallback { #[inline(always)] fn select_u32x4(self, a: mask32x4, b: u32x4, c: u32x4) -> u32x4 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, + if a.val.0[2usize] != 0 { + b[2usize] + } else { + c[2usize] + }, + if a.val.0[3usize] != 0 { + b[3usize] + } else { + c[3usize] + }, ] .simd_into(self) } @@ -3632,113 +3786,58 @@ impl Simd for Fallback { .simd_into(self) } #[inline(always)] - fn splat_mask32x4(self, val: i32) -> mask32x4 { + fn splat_mask32x4(self, val: bool) -> mask32x4 { + let val: i32 = if val { !0 } else { 0 }; [val; 4usize].simd_into(self) } #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { mask32x4 { - val: crate::support::Aligned128(val), - simd: self, - } - } - #[inline(always)] - fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { - mask32x4 { - val: crate::support::Aligned128(*val), - simd: self, - } - } - #[inline(always)] - fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize] { - a.val.0 - } - #[inline(always)] - fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { - unsafe { - mask32x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x4(self, a: mask32x4) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } + val: crate::support::Aligned128(val), + simd: self, } } #[inline(always)] - fn slide_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - let mut dest = [Default::default(); 4usize]; - dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - self.slide_mask32x4::(a, b) + fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize] { + a.val.0 } #[inline(always)] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { [ - i32::bitand(a[0usize], &b[0usize]), - i32::bitand(a[1usize], &b[1usize]), - i32::bitand(a[2usize], &b[2usize]), - i32::bitand(a[3usize], &b[3usize]), + i32::bitand(a.val.0[0usize], &b.val.0[0usize]), + i32::bitand(a.val.0[1usize], &b.val.0[1usize]), + i32::bitand(a.val.0[2usize], &b.val.0[2usize]), + i32::bitand(a.val.0[3usize], &b.val.0[3usize]), ] .simd_into(self) } #[inline(always)] fn or_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { [ - i32::bitor(a[0usize], &b[0usize]), - i32::bitor(a[1usize], &b[1usize]), - i32::bitor(a[2usize], &b[2usize]), - i32::bitor(a[3usize], &b[3usize]), + i32::bitor(a.val.0[0usize], &b.val.0[0usize]), + i32::bitor(a.val.0[1usize], &b.val.0[1usize]), + i32::bitor(a.val.0[2usize], &b.val.0[2usize]), + i32::bitor(a.val.0[3usize], &b.val.0[3usize]), ] .simd_into(self) } #[inline(always)] fn xor_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { [ - i32::bitxor(a[0usize], &b[0usize]), - i32::bitxor(a[1usize], &b[1usize]), - i32::bitxor(a[2usize], &b[2usize]), - i32::bitxor(a[3usize], &b[3usize]), + i32::bitxor(a.val.0[0usize], &b.val.0[0usize]), + i32::bitxor(a.val.0[1usize], &b.val.0[1usize]), + i32::bitxor(a.val.0[2usize], &b.val.0[2usize]), + i32::bitxor(a.val.0[3usize], &b.val.0[3usize]), ] .simd_into(self) } #[inline(always)] fn not_mask32x4(self, a: mask32x4) -> mask32x4 { [ - i32::not(a[0usize]), - i32::not(a[1usize]), - i32::not(a[2usize]), - i32::not(a[3usize]), + i32::not(a.val.0[0usize]), + i32::not(a.val.0[1usize]), + i32::not(a.val.0[2usize]), + i32::not(a.val.0[3usize]), ] .simd_into(self) } @@ -3750,38 +3849,54 @@ impl Simd for Fallback { c: mask32x4, ) -> mask32x4 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, - if a[2usize] != 0 { b[2usize] } else { c[2usize] }, - if a[3usize] != 0 { b[3usize] } else { c[3usize] }, + if a.val.0[0usize] != 0 { + b.val.0[0usize] + } else { + c.val.0[0usize] + }, + if a.val.0[1usize] != 0 { + b.val.0[1usize] + } else { + c.val.0[1usize] + }, + if a.val.0[2usize] != 0 { + b.val.0[2usize] + } else { + c.val.0[2usize] + }, + if a.val.0[3usize] != 0 { + b.val.0[3usize] + } else { + c.val.0[3usize] + }, ] .simd_into(self) } #[inline(always)] fn simd_eq_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { [ - -(i32::eq(&a[0usize], &b[0usize]) as i32), - -(i32::eq(&a[1usize], &b[1usize]) as i32), - -(i32::eq(&a[2usize], &b[2usize]) as i32), - -(i32::eq(&a[3usize], &b[3usize]) as i32), + -(i32::eq(&a.val.0[0usize], &b.val.0[0usize]) as i32), + -(i32::eq(&a.val.0[1usize], &b.val.0[1usize]) as i32), + -(i32::eq(&a.val.0[2usize], &b.val.0[2usize]) as i32), + -(i32::eq(&a.val.0[3usize], &b.val.0[3usize]) as i32), ] .simd_into(self) } #[inline(always)] fn any_true_mask32x4(self, a: mask32x4) -> bool { - a[0usize] != 0 || a[1usize] != 0 || a[2usize] != 0 || a[3usize] != 0 + a.val.0[0usize] != 0 || a.val.0[1usize] != 0 || a.val.0[2usize] != 0 || a.val.0[3usize] != 0 } #[inline(always)] fn all_true_mask32x4(self, a: mask32x4) -> bool { - a[0usize] != 0 && a[1usize] != 0 && a[2usize] != 0 && a[3usize] != 0 + a.val.0[0usize] != 0 && a.val.0[1usize] != 0 && a.val.0[2usize] != 0 && a.val.0[3usize] != 0 } #[inline(always)] fn any_false_mask32x4(self, a: mask32x4) -> bool { - a[0usize] == 0 || a[1usize] == 0 || a[2usize] == 0 || a[3usize] == 0 + a.val.0[0usize] == 0 || a.val.0[1usize] == 0 || a.val.0[2usize] == 0 || a.val.0[3usize] == 0 } #[inline(always)] fn all_false_mask32x4(self, a: mask32x4) -> bool { - a[0usize] == 0 && a[1usize] == 0 && a[2usize] == 0 && a[3usize] == 0 + a.val.0[0usize] == 0 && a.val.0[1usize] == 0 && a.val.0[2usize] == 0 && a.val.0[3usize] == 0 } #[inline(always)] fn combine_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x8 { @@ -4040,8 +4155,16 @@ impl Simd for Fallback { #[inline(always)] fn select_f64x2(self, a: mask64x2, b: f64x2, c: f64x2) -> f64x2 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, + if a.val.0[0usize] != 0 { + b[0usize] + } else { + c[0usize] + }, + if a.val.0[1usize] != 0 { + b[1usize] + } else { + c[1usize] + }, ] .simd_into(self) } @@ -4057,7 +4180,8 @@ impl Simd for Fallback { a.bitcast() } #[inline(always)] - fn splat_mask64x2(self, val: i64) -> mask64x2 { + fn splat_mask64x2(self, val: bool) -> mask64x2 { + let val: i64 = if val { !0 } else { 0 }; [val; 2usize].simd_into(self) } #[inline(always)] @@ -4068,92 +4192,36 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { - mask64x2 { - val: crate::support::Aligned128(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask64x2(self, a: mask64x2) -> [i64; 2usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { - unsafe { - mask64x2 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x2(self, a: mask64x2) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - let mut dest = [Default::default(); 2usize]; - dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - self.slide_mask64x2::(a, b) - } - #[inline(always)] fn and_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { [ - i64::bitand(a[0usize], &b[0usize]), - i64::bitand(a[1usize], &b[1usize]), + i64::bitand(a.val.0[0usize], &b.val.0[0usize]), + i64::bitand(a.val.0[1usize], &b.val.0[1usize]), ] .simd_into(self) } #[inline(always)] fn or_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { [ - i64::bitor(a[0usize], &b[0usize]), - i64::bitor(a[1usize], &b[1usize]), + i64::bitor(a.val.0[0usize], &b.val.0[0usize]), + i64::bitor(a.val.0[1usize], &b.val.0[1usize]), ] .simd_into(self) } #[inline(always)] fn xor_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { [ - i64::bitxor(a[0usize], &b[0usize]), - i64::bitxor(a[1usize], &b[1usize]), + i64::bitxor(a.val.0[0usize], &b.val.0[0usize]), + i64::bitxor(a.val.0[1usize], &b.val.0[1usize]), ] .simd_into(self) } #[inline(always)] fn not_mask64x2(self, a: mask64x2) -> mask64x2 { - [i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self) + [i64::not(a.val.0[0usize]), i64::not(a.val.0[1usize])].simd_into(self) } #[inline(always)] fn select_mask64x2( @@ -4163,34 +4231,42 @@ impl Simd for Fallback { c: mask64x2, ) -> mask64x2 { [ - if a[0usize] != 0 { b[0usize] } else { c[0usize] }, - if a[1usize] != 0 { b[1usize] } else { c[1usize] }, + if a.val.0[0usize] != 0 { + b.val.0[0usize] + } else { + c.val.0[0usize] + }, + if a.val.0[1usize] != 0 { + b.val.0[1usize] + } else { + c.val.0[1usize] + }, ] .simd_into(self) } #[inline(always)] fn simd_eq_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { [ - -(i64::eq(&a[0usize], &b[0usize]) as i64), - -(i64::eq(&a[1usize], &b[1usize]) as i64), + -(i64::eq(&a.val.0[0usize], &b.val.0[0usize]) as i64), + -(i64::eq(&a.val.0[1usize], &b.val.0[1usize]) as i64), ] .simd_into(self) } #[inline(always)] fn any_true_mask64x2(self, a: mask64x2) -> bool { - a[0usize] != 0 || a[1usize] != 0 + a.val.0[0usize] != 0 || a.val.0[1usize] != 0 } #[inline(always)] fn all_true_mask64x2(self, a: mask64x2) -> bool { - a[0usize] != 0 && a[1usize] != 0 + a.val.0[0usize] != 0 && a.val.0[1usize] != 0 } #[inline(always)] fn any_false_mask64x2(self, a: mask64x2) -> bool { - a[0usize] == 0 || a[1usize] == 0 + a.val.0[0usize] == 0 || a.val.0[1usize] == 0 } #[inline(always)] fn all_false_mask64x2(self, a: mask64x2) -> bool { - a[0usize] == 0 && a[1usize] == 0 + a.val.0[0usize] == 0 && a.val.0[1usize] == 0 } #[inline(always)] fn combine_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x4 { @@ -5084,7 +5160,7 @@ impl Simd for Fallback { ) } #[inline(always)] - fn splat_mask8x32(self, val: i8) -> mask8x32 { + fn splat_mask8x32(self, val: bool) -> mask8x32 { let half = self.splat_mask8x16(val); self.combine_mask8x16(half, half) } @@ -5096,71 +5172,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { - mask8x32 { - val: crate::support::Aligned256(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask8x32(self, a: mask8x32) -> [i8; 32usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { - unsafe { - mask8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x32(self, a: mask8x32) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - let mut dest = [Default::default(); 32usize]; - dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - let (a0, a1) = self.split_mask8x32(a); - let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.slide_within_blocks_mask8x16::(a0, b0), - self.slide_within_blocks_mask8x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); @@ -5793,7 +5808,7 @@ impl Simd for Fallback { ) } #[inline(always)] - fn splat_mask16x16(self, val: i16) -> mask16x16 { + fn splat_mask16x16(self, val: bool) -> mask16x16 { let half = self.splat_mask16x8(val); self.combine_mask16x8(half, half) } @@ -5805,71 +5820,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { - mask16x16 { - val: crate::support::Aligned256(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask16x16(self, a: mask16x16) -> [i16; 16usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { - unsafe { - mask16x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x16(self, a: mask16x16) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - let mut dest = [Default::default(); 16usize]; - dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - let (a0, a1) = self.split_mask16x16(a); - let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.slide_within_blocks_mask16x8::(a0, b0), - self.slide_within_blocks_mask16x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); @@ -6482,7 +6436,7 @@ impl Simd for Fallback { self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1)) } #[inline(always)] - fn splat_mask32x8(self, val: i32) -> mask32x8 { + fn splat_mask32x8(self, val: bool) -> mask32x8 { let half = self.splat_mask32x4(val); self.combine_mask32x4(half, half) } @@ -6490,73 +6444,12 @@ impl Simd for Fallback { fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8 { mask32x8 { val: crate::support::Aligned256(val), - simd: self, - } - } - #[inline(always)] - fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { - mask32x8 { - val: crate::support::Aligned256(*val), - simd: self, - } - } - #[inline(always)] - fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize] { - a.val.0 - } - #[inline(always)] - fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { - unsafe { - mask32x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x8(self, a: mask32x8) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - let mut dest = [Default::default(); 8usize]; - dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - let (a0, a1) = self.split_mask32x8(a); - let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.slide_within_blocks_mask32x4::(a0, b0), - self.slide_within_blocks_mask32x4::(a1, b1), - ) + simd: self, + } + } + #[inline(always)] + fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize] { + a.val.0 } #[inline(always)] fn and_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { @@ -6944,7 +6837,7 @@ impl Simd for Fallback { ) } #[inline(always)] - fn splat_mask64x4(self, val: i64) -> mask64x4 { + fn splat_mask64x4(self, val: bool) -> mask64x4 { let half = self.splat_mask64x2(val); self.combine_mask64x2(half, half) } @@ -6956,71 +6849,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { - mask64x4 { - val: crate::support::Aligned256(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { - unsafe { - mask64x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x4(self, a: mask64x4) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - let mut dest = [Default::default(); 4usize]; - dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - let (a0, a1) = self.split_mask64x4(a); - let (b0, b1) = self.split_mask64x4(b); - self.combine_mask64x2( - self.slide_within_blocks_mask64x2::(a0, b0), - self.slide_within_blocks_mask64x2::(a1, b1), - ) - } - #[inline(always)] fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { let (a0, a1) = self.split_mask64x4(a); let (b0, b1) = self.split_mask64x4(b); @@ -8073,7 +7905,7 @@ impl Simd for Fallback { ) } #[inline(always)] - fn splat_mask8x64(self, val: i8) -> mask8x64 { + fn splat_mask8x64(self, val: bool) -> mask8x64 { let half = self.splat_mask8x32(val); self.combine_mask8x32(half, half) } @@ -8085,71 +7917,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { - mask8x64 { - val: crate::support::Aligned512(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask8x64(self, a: mask8x64) -> [i8; 64usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { - unsafe { - mask8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x64(self, a: mask8x64) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - let mut dest = [Default::default(); 64usize]; - dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - let (a0, a1) = self.split_mask8x64(a); - let (b0, b1) = self.split_mask8x64(b); - self.combine_mask8x32( - self.slide_within_blocks_mask8x32::(a0, b0), - self.slide_within_blocks_mask8x32::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64 { let (a0, a1) = self.split_mask8x64(a); let (b0, b1) = self.split_mask8x64(b); @@ -8810,7 +8581,7 @@ impl Simd for Fallback { ) } #[inline(always)] - fn splat_mask16x32(self, val: i16) -> mask16x32 { + fn splat_mask16x32(self, val: bool) -> mask16x32 { let half = self.splat_mask16x16(val); self.combine_mask16x16(half, half) } @@ -8822,71 +8593,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { - mask16x32 { - val: crate::support::Aligned512(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask16x32(self, a: mask16x32) -> [i16; 32usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { - unsafe { - mask16x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x32(self, a: mask16x32) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - let mut dest = [Default::default(); 32usize]; - dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - let (a0, a1) = self.split_mask16x32(a); - let (b0, b1) = self.split_mask16x32(b); - self.combine_mask16x16( - self.slide_within_blocks_mask16x16::(a0, b0), - self.slide_within_blocks_mask16x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32 { let (a0, a1) = self.split_mask16x32(a); let (b0, b1) = self.split_mask16x32(b); @@ -9511,7 +9221,7 @@ impl Simd for Fallback { self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1)) } #[inline(always)] - fn splat_mask32x16(self, val: i32) -> mask32x16 { + fn splat_mask32x16(self, val: bool) -> mask32x16 { let half = self.splat_mask32x8(val); self.combine_mask32x8(half, half) } @@ -9523,71 +9233,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { - mask32x16 { - val: crate::support::Aligned512(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask32x16(self, a: mask32x16) -> [i32; 16usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { - unsafe { - mask32x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x16(self, a: mask32x16) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - let mut dest = [Default::default(); 16usize]; - dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - let (a0, a1) = self.split_mask32x16(a); - let (b0, b1) = self.split_mask32x16(b); - self.combine_mask32x8( - self.slide_within_blocks_mask32x8::(a0, b0), - self.slide_within_blocks_mask32x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16 { let (a0, a1) = self.split_mask32x16(a); let (b0, b1) = self.split_mask32x16(b); @@ -9959,7 +9608,7 @@ impl Simd for Fallback { ) } #[inline(always)] - fn splat_mask64x8(self, val: i64) -> mask64x8 { + fn splat_mask64x8(self, val: bool) -> mask64x8 { let half = self.splat_mask64x4(val); self.combine_mask64x4(half, half) } @@ -9971,71 +9620,10 @@ impl Simd for Fallback { } } #[inline(always)] - fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { - mask64x8 { - val: crate::support::Aligned512(*val), - simd: self, - } - } - #[inline(always)] fn as_array_mask64x8(self, a: mask64x8) -> [i64; 8usize] { a.val.0 } #[inline(always)] - fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize] { - &a.val.0 - } - #[inline(always)] - fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize] { - &mut a.val.0 - } - #[inline(always)] - fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { - *dest = a.val.0; - } - #[inline(always)] - fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { - unsafe { - mask64x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x8(self, a: mask64x8) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - let mut dest = [Default::default(); 8usize]; - dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]); - dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]); - dest.simd_into(self) - } - #[inline(always)] - fn slide_within_blocks_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - let (a0, a1) = self.split_mask64x8(a); - let (b0, b1) = self.split_mask64x8(b); - self.combine_mask64x4( - self.slide_within_blocks_mask64x4::(a0, b0), - self.slide_within_blocks_mask64x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8 { let (a0, a1) = self.split_mask64x8(a); let (b0, b1) = self.split_mask64x8(b); diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index d6d38db4..ad46f2f5 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -768,8 +768,11 @@ impl Simd for Neon { unsafe { vreinterpretq_u32_u8(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask8x16(self, val: i8) -> mask8x16 { - unsafe { vdupq_n_s8(val).simd_into(self) } + fn splat_mask8x16(self, val: bool) -> mask8x16 { + unsafe { + let val: i8 = if val { !0 } else { 0 }; + vdupq_n_s8(val).simd_into(self) + } } #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { @@ -779,82 +782,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { - mask8x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x16(self, a: mask8x16) -> [i8; 16usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize] { - unsafe { core::mem::transmute::<&int8x16_t, &[i8; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize] { - unsafe { core::mem::transmute::<&mut int8x16_t, &mut [i8; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { - unsafe { - mask8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x16(self, a: mask8x16) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - if SHIFT >= 16usize { - return b; - } - let result = unsafe { - dyn_vext_128( - self.cvt_to_bytes_mask8x16(a).val.0, - self.cvt_to_bytes_mask8x16(b).val.0, - SHIFT, - ) - }; - self.cvt_from_bytes_mask8x16(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - self.slide_mask8x16::(a, b) - } - #[inline(always)] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { unsafe { vandq_s8(a.into(), b.into()).simd_into(self) } } @@ -1323,8 +1254,11 @@ impl Simd for Neon { unsafe { vreinterpretq_u32_u16(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask16x8(self, val: i16) -> mask16x8 { - unsafe { vdupq_n_s16(val).simd_into(self) } + fn splat_mask16x8(self, val: bool) -> mask16x8 { + unsafe { + let val: i16 = if val { !0 } else { 0 }; + vdupq_n_s16(val).simd_into(self) + } } #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { @@ -1334,82 +1268,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { - mask16x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x8(self, a: mask16x8) -> [i16; 8usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize] { - unsafe { core::mem::transmute::<&int16x8_t, &[i16; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize] { - unsafe { core::mem::transmute::<&mut int16x8_t, &mut [i16; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { - unsafe { - mask16x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x8(self, a: mask16x8) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - if SHIFT >= 8usize { - return b; - } - let result = unsafe { - dyn_vext_128( - self.cvt_to_bytes_mask16x8(a).val.0, - self.cvt_to_bytes_mask16x8(b).val.0, - SHIFT * 2usize, - ) - }; - self.cvt_from_bytes_mask16x8(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - self.slide_mask16x8::(a, b) - } - #[inline(always)] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { unsafe { vandq_s16(a.into(), b.into()).simd_into(self) } } @@ -1882,8 +1744,11 @@ impl Simd for Neon { unsafe { vcvtq_f32_u32(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask32x4(self, val: i32) -> mask32x4 { - unsafe { vdupq_n_s32(val).simd_into(self) } + fn splat_mask32x4(self, val: bool) -> mask32x4 { + unsafe { + let val: i32 = if val { !0 } else { 0 }; + vdupq_n_s32(val).simd_into(self) + } } #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { @@ -1893,82 +1758,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { - mask32x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize] { - unsafe { core::mem::transmute::<&int32x4_t, &[i32; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize] { - unsafe { core::mem::transmute::<&mut int32x4_t, &mut [i32; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { - unsafe { - mask32x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x4(self, a: mask32x4) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - if SHIFT >= 4usize { - return b; - } - let result = unsafe { - dyn_vext_128( - self.cvt_to_bytes_mask32x4(a).val.0, - self.cvt_to_bytes_mask32x4(b).val.0, - SHIFT * 4usize, - ) - }; - self.cvt_from_bytes_mask32x4(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - self.slide_mask32x4::(a, b) - } - #[inline(always)] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { unsafe { vandq_s32(a.into(), b.into()).simd_into(self) } } @@ -2254,8 +2047,11 @@ impl Simd for Neon { unsafe { vreinterpretq_f32_f64(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask64x2(self, val: i64) -> mask64x2 { - unsafe { vdupq_n_s64(val).simd_into(self) } + fn splat_mask64x2(self, val: bool) -> mask64x2 { + unsafe { + let val: i64 = if val { !0 } else { 0 }; + vdupq_n_s64(val).simd_into(self) + } } #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { @@ -2265,82 +2061,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { - mask64x2 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x2(self, a: mask64x2) -> [i64; 2usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize] { - unsafe { core::mem::transmute::<&int64x2_t, &[i64; 2usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize] { - unsafe { core::mem::transmute::<&mut int64x2_t, &mut [i64; 2usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 2usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { - unsafe { - mask64x2 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x2(self, a: mask64x2) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - if SHIFT >= 2usize { - return b; - } - let result = unsafe { - dyn_vext_128( - self.cvt_to_bytes_mask64x2(a).val.0, - self.cvt_to_bytes_mask64x2(b).val.0, - SHIFT * 8usize, - ) - }; - self.cvt_from_bytes_mask64x2(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - self.slide_mask64x2::(a, b) - } - #[inline(always)] fn and_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { unsafe { vandq_s64(a.into(), b.into()).simd_into(self) } } @@ -3406,7 +3130,7 @@ impl Simd for Neon { ) } #[inline(always)] - fn splat_mask8x32(self, val: i8) -> mask8x32 { + fn splat_mask8x32(self, val: bool) -> mask8x32 { let half = self.splat_mask8x16(val); self.combine_mask8x16(half, half) } @@ -3418,107 +3142,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { - mask8x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x32(self, a: mask8x32) -> [i8; 32usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize] { - unsafe { core::mem::transmute::<&int8x16x2_t, &[i8; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize] { - unsafe { core::mem::transmute::<&mut int8x16x2_t, &mut [i8; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { - unsafe { - mask8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x32(self, a: mask8x32) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - if SHIFT >= 32usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask8x32(a).val.0; - let b_bytes = self.cvt_to_bytes_mask8x32(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1]; - let b_blocks = [b_bytes.0, b_bytes.1]; - let shift_bytes = SHIFT; - uint8x16x2_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask8x32(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - let (a0, a1) = self.split_mask8x32(a); - let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.slide_within_blocks_mask8x16::(a0, b0), - self.slide_within_blocks_mask8x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); @@ -4231,7 +3858,7 @@ impl Simd for Neon { ) } #[inline(always)] - fn splat_mask16x16(self, val: i16) -> mask16x16 { + fn splat_mask16x16(self, val: bool) -> mask16x16 { let half = self.splat_mask16x8(val); self.combine_mask16x8(half, half) } @@ -4243,107 +3870,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { - mask16x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x16(self, a: mask16x16) -> [i16; 16usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize] { - unsafe { core::mem::transmute::<&int16x8x2_t, &[i16; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize] { - unsafe { core::mem::transmute::<&mut int16x8x2_t, &mut [i16; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { - unsafe { - mask16x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x16(self, a: mask16x16) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - if SHIFT >= 16usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask16x16(a).val.0; - let b_bytes = self.cvt_to_bytes_mask16x16(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1]; - let b_blocks = [b_bytes.0, b_bytes.1]; - let shift_bytes = SHIFT * 2usize; - uint8x16x2_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask16x16(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - let (a0, a1) = self.split_mask16x16(a); - let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.slide_within_blocks_mask16x8::(a0, b0), - self.slide_within_blocks_mask16x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); @@ -5049,7 +4579,7 @@ impl Simd for Neon { self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1)) } #[inline(always)] - fn splat_mask32x8(self, val: i32) -> mask32x8 { + fn splat_mask32x8(self, val: bool) -> mask32x8 { let half = self.splat_mask32x4(val); self.combine_mask32x4(half, half) } @@ -5061,107 +4591,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { - mask32x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize] { - unsafe { core::mem::transmute::<&int32x4x2_t, &[i32; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize] { - unsafe { core::mem::transmute::<&mut int32x4x2_t, &mut [i32; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { - unsafe { - mask32x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x8(self, a: mask32x8) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - if SHIFT >= 8usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask32x8(a).val.0; - let b_bytes = self.cvt_to_bytes_mask32x8(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1]; - let b_blocks = [b_bytes.0, b_bytes.1]; - let shift_bytes = SHIFT * 4usize; - uint8x16x2_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask32x8(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - let (a0, a1) = self.split_mask32x8(a); - let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.slide_within_blocks_mask32x4::(a0, b0), - self.slide_within_blocks_mask32x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_mask32x8(b); @@ -5597,7 +5030,7 @@ impl Simd for Neon { ) } #[inline(always)] - fn splat_mask64x4(self, val: i64) -> mask64x4 { + fn splat_mask64x4(self, val: bool) -> mask64x4 { let half = self.splat_mask64x2(val); self.combine_mask64x2(half, half) } @@ -5609,111 +5042,14 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { - mask64x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize] { - unsafe { core::mem::transmute::<&int64x2x2_t, &[i64; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize] { - unsafe { core::mem::transmute::<&mut int64x2x2_t, &mut [i64; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { - unsafe { - mask64x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x4(self, a: mask64x4) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - if SHIFT >= 4usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask64x4(a).val.0; - let b_bytes = self.cvt_to_bytes_mask64x4(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1]; - let b_blocks = [b_bytes.0, b_bytes.1]; - let shift_bytes = SHIFT * 8usize; - uint8x16x2_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask64x4(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - let (a0, a1) = self.split_mask64x4(a); - let (b0, b1) = self.split_mask64x4(b); - self.combine_mask64x2( - self.slide_within_blocks_mask64x2::(a0, b0), - self.slide_within_blocks_mask64x2::(a1, b1), - ) - } - #[inline(always)] - fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { - let (a0, a1) = self.split_mask64x4(a); - let (b0, b1) = self.split_mask64x4(b); - self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1)) + fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { + let (a0, a1) = self.split_mask64x4(a); + let (b0, b1) = self.split_mask64x4(b); + self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1)) } #[inline(always)] fn or_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { @@ -6847,7 +6183,7 @@ impl Simd for Neon { ) } #[inline(always)] - fn splat_mask8x64(self, val: i8) -> mask8x64 { + fn splat_mask8x64(self, val: bool) -> mask8x64 { let half = self.splat_mask8x32(val); self.combine_mask8x32(half, half) } @@ -6859,125 +6195,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { - mask8x64 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x64(self, a: mask8x64) -> [i8; 64usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize] { - unsafe { core::mem::transmute::<&int8x16x4_t, &[i8; 64usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize] { - unsafe { core::mem::transmute::<&mut int8x16x4_t, &mut [i8; 64usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 64usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { - unsafe { - mask8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x64(self, a: mask8x64) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - if SHIFT >= 64usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask8x64(a).val.0; - let b_bytes = self.cvt_to_bytes_mask8x64(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3]; - let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3]; - let shift_bytes = SHIFT; - uint8x16x4_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 2, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 3, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask8x64(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - let (a0, a1) = self.split_mask8x64(a); - let (b0, b1) = self.split_mask8x64(b); - self.combine_mask8x32( - self.slide_within_blocks_mask8x32::(a0, b0), - self.slide_within_blocks_mask8x32::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64 { let (a0, a1) = self.split_mask8x64(a); let (b0, b1) = self.split_mask8x64(b); @@ -7721,7 +6942,7 @@ impl Simd for Neon { ) } #[inline(always)] - fn splat_mask16x32(self, val: i16) -> mask16x32 { + fn splat_mask16x32(self, val: bool) -> mask16x32 { let half = self.splat_mask16x16(val); self.combine_mask16x16(half, half) } @@ -7733,125 +6954,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { - mask16x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x32(self, a: mask16x32) -> [i16; 32usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize] { - unsafe { core::mem::transmute::<&int16x8x4_t, &[i16; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize] { - unsafe { core::mem::transmute::<&mut int16x8x4_t, &mut [i16; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { - unsafe { - mask16x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x32(self, a: mask16x32) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - if SHIFT >= 32usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask16x32(a).val.0; - let b_bytes = self.cvt_to_bytes_mask16x32(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3]; - let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3]; - let shift_bytes = SHIFT * 2usize; - uint8x16x4_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 2, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 3, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask16x32(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - let (a0, a1) = self.split_mask16x32(a); - let (b0, b1) = self.split_mask16x32(b); - self.combine_mask16x16( - self.slide_within_blocks_mask16x16::(a0, b0), - self.slide_within_blocks_mask16x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32 { let (a0, a1) = self.split_mask16x32(a); let (b0, b1) = self.split_mask16x32(b); @@ -8577,7 +7683,7 @@ impl Simd for Neon { self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1)) } #[inline(always)] - fn splat_mask32x16(self, val: i32) -> mask32x16 { + fn splat_mask32x16(self, val: bool) -> mask32x16 { let half = self.splat_mask32x8(val); self.combine_mask32x8(half, half) } @@ -8589,125 +7695,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { - mask32x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x16(self, a: mask32x16) -> [i32; 16usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize] { - unsafe { core::mem::transmute::<&int32x4x4_t, &[i32; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize] { - unsafe { core::mem::transmute::<&mut int32x4x4_t, &mut [i32; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { - unsafe { - mask32x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x16(self, a: mask32x16) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - if SHIFT >= 16usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask32x16(a).val.0; - let b_bytes = self.cvt_to_bytes_mask32x16(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3]; - let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3]; - let shift_bytes = SHIFT * 4usize; - uint8x16x4_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 2, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 3, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask32x16(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - let (a0, a1) = self.split_mask32x16(a); - let (b0, b1) = self.split_mask32x16(b); - self.combine_mask32x8( - self.slide_within_blocks_mask32x8::(a0, b0), - self.slide_within_blocks_mask32x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16 { let (a0, a1) = self.split_mask32x16(a); let (b0, b1) = self.split_mask32x16(b); @@ -9143,7 +8134,7 @@ impl Simd for Neon { ) } #[inline(always)] - fn splat_mask64x8(self, val: i64) -> mask64x8 { + fn splat_mask64x8(self, val: bool) -> mask64x8 { let half = self.splat_mask64x4(val); self.combine_mask64x4(half, half) } @@ -9155,125 +8146,10 @@ impl Simd for Neon { } } #[inline(always)] - fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { - mask64x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x8(self, a: mask64x8) -> [i64; 8usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize] { - unsafe { core::mem::transmute::<&int64x2x4_t, &[i64; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize] { - unsafe { core::mem::transmute::<&mut int64x2x4_t, &mut [i64; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { - unsafe { - mask64x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x8(self, a: mask64x8) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - if SHIFT >= 8usize { - return b; - } - let result = unsafe { - let a_bytes = self.cvt_to_bytes_mask64x8(a).val.0; - let b_bytes = self.cvt_to_bytes_mask64x8(b).val.0; - let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3]; - let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3]; - let shift_bytes = SHIFT * 8usize; - uint8x16x4_t( - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 0, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 1, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 2, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - { - let [lo, hi] = crate::support::cross_block_slide_blocks_at( - &a_blocks, - &b_blocks, - 3, - shift_bytes, - ); - dyn_vext_128(lo, hi, shift_bytes % 16) - }, - ) - }; - self.cvt_from_bytes_mask64x8(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - #[inline(always)] - fn slide_within_blocks_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - let (a0, a1) = self.split_mask64x8(a); - let (b0, b1) = self.split_mask64x8(b); - self.combine_mask64x4( - self.slide_within_blocks_mask64x4::(a0, b0), - self.slide_within_blocks_mask64x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8 { let (a0, a1) = self.split_mask64x8(a); let (b0, b1) = self.split_mask64x8(b); diff --git a/fearless_simd/src/generated/ops.rs b/fearless_simd/src/generated/ops.rs index 1e6f4075..b05d9918 100644 --- a/fearless_simd/src/generated/ops.rs +++ b/fearless_simd/src/generated/ops.rs @@ -733,26 +733,6 @@ impl core::ops::BitAndAssign for mask8x16 { *self = self.simd.and_mask8x16(*self, rhs); } } -impl core::ops::BitAnd for mask8x16 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i8) -> Self::Output { - self.simd.and_mask8x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask8x16 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i8) { - *self = self.simd.and_mask8x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i8 { - type Output = mask8x16; - #[inline(always)] - fn bitand(self, rhs: mask8x16) -> Self::Output { - rhs.simd.and_mask8x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask8x16 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -768,26 +748,6 @@ impl core::ops::BitOrAssign for mask8x16 { *self = self.simd.or_mask8x16(*self, rhs); } } -impl core::ops::BitOr for mask8x16 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i8) -> Self::Output { - self.simd.or_mask8x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask8x16 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i8) { - *self = self.simd.or_mask8x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i8 { - type Output = mask8x16; - #[inline(always)] - fn bitor(self, rhs: mask8x16) -> Self::Output { - rhs.simd.or_mask8x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask8x16 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -803,26 +763,6 @@ impl core::ops::BitXorAssign for mask8x16 { *self = self.simd.xor_mask8x16(*self, rhs); } } -impl core::ops::BitXor for mask8x16 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i8) -> Self::Output { - self.simd.xor_mask8x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask8x16 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i8) { - *self = self.simd.xor_mask8x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i8 { - type Output = mask8x16; - #[inline(always)] - fn bitxor(self, rhs: mask8x16) -> Self::Output { - rhs.simd.xor_mask8x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask8x16 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -1406,26 +1346,6 @@ impl core::ops::BitAndAssign for mask16x8 { *self = self.simd.and_mask16x8(*self, rhs); } } -impl core::ops::BitAnd for mask16x8 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i16) -> Self::Output { - self.simd.and_mask16x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask16x8 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i16) { - *self = self.simd.and_mask16x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i16 { - type Output = mask16x8; - #[inline(always)] - fn bitand(self, rhs: mask16x8) -> Self::Output { - rhs.simd.and_mask16x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask16x8 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -1441,26 +1361,6 @@ impl core::ops::BitOrAssign for mask16x8 { *self = self.simd.or_mask16x8(*self, rhs); } } -impl core::ops::BitOr for mask16x8 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i16) -> Self::Output { - self.simd.or_mask16x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask16x8 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i16) { - *self = self.simd.or_mask16x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i16 { - type Output = mask16x8; - #[inline(always)] - fn bitor(self, rhs: mask16x8) -> Self::Output { - rhs.simd.or_mask16x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask16x8 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -1476,26 +1376,6 @@ impl core::ops::BitXorAssign for mask16x8 { *self = self.simd.xor_mask16x8(*self, rhs); } } -impl core::ops::BitXor for mask16x8 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i16) -> Self::Output { - self.simd.xor_mask16x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask16x8 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i16) { - *self = self.simd.xor_mask16x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i16 { - type Output = mask16x8; - #[inline(always)] - fn bitxor(self, rhs: mask16x8) -> Self::Output { - rhs.simd.xor_mask16x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask16x8 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -2079,26 +1959,6 @@ impl core::ops::BitAndAssign for mask32x4 { *self = self.simd.and_mask32x4(*self, rhs); } } -impl core::ops::BitAnd for mask32x4 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i32) -> Self::Output { - self.simd.and_mask32x4(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask32x4 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i32) { - *self = self.simd.and_mask32x4(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i32 { - type Output = mask32x4; - #[inline(always)] - fn bitand(self, rhs: mask32x4) -> Self::Output { - rhs.simd.and_mask32x4(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask32x4 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -2114,26 +1974,6 @@ impl core::ops::BitOrAssign for mask32x4 { *self = self.simd.or_mask32x4(*self, rhs); } } -impl core::ops::BitOr for mask32x4 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i32) -> Self::Output { - self.simd.or_mask32x4(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask32x4 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i32) { - *self = self.simd.or_mask32x4(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i32 { - type Output = mask32x4; - #[inline(always)] - fn bitor(self, rhs: mask32x4) -> Self::Output { - rhs.simd.or_mask32x4(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask32x4 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -2149,26 +1989,6 @@ impl core::ops::BitXorAssign for mask32x4 { *self = self.simd.xor_mask32x4(*self, rhs); } } -impl core::ops::BitXor for mask32x4 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i32) -> Self::Output { - self.simd.xor_mask32x4(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask32x4 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i32) { - *self = self.simd.xor_mask32x4(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i32 { - type Output = mask32x4; - #[inline(always)] - fn bitxor(self, rhs: mask32x4) -> Self::Output { - rhs.simd.xor_mask32x4(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask32x4 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -2340,26 +2160,6 @@ impl core::ops::BitAndAssign for mask64x2 { *self = self.simd.and_mask64x2(*self, rhs); } } -impl core::ops::BitAnd for mask64x2 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i64) -> Self::Output { - self.simd.and_mask64x2(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask64x2 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i64) { - *self = self.simd.and_mask64x2(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i64 { - type Output = mask64x2; - #[inline(always)] - fn bitand(self, rhs: mask64x2) -> Self::Output { - rhs.simd.and_mask64x2(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask64x2 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -2375,26 +2175,6 @@ impl core::ops::BitOrAssign for mask64x2 { *self = self.simd.or_mask64x2(*self, rhs); } } -impl core::ops::BitOr for mask64x2 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i64) -> Self::Output { - self.simd.or_mask64x2(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask64x2 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i64) { - *self = self.simd.or_mask64x2(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i64 { - type Output = mask64x2; - #[inline(always)] - fn bitor(self, rhs: mask64x2) -> Self::Output { - rhs.simd.or_mask64x2(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask64x2 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -2410,26 +2190,6 @@ impl core::ops::BitXorAssign for mask64x2 { *self = self.simd.xor_mask64x2(*self, rhs); } } -impl core::ops::BitXor for mask64x2 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i64) -> Self::Output { - self.simd.xor_mask64x2(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask64x2 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i64) { - *self = self.simd.xor_mask64x2(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i64 { - type Output = mask64x2; - #[inline(always)] - fn bitxor(self, rhs: mask64x2) -> Self::Output { - rhs.simd.xor_mask64x2(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask64x2 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -3161,26 +2921,6 @@ impl core::ops::BitAndAssign for mask8x32 { *self = self.simd.and_mask8x32(*self, rhs); } } -impl core::ops::BitAnd for mask8x32 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i8) -> Self::Output { - self.simd.and_mask8x32(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask8x32 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i8) { - *self = self.simd.and_mask8x32(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i8 { - type Output = mask8x32; - #[inline(always)] - fn bitand(self, rhs: mask8x32) -> Self::Output { - rhs.simd.and_mask8x32(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask8x32 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -3196,26 +2936,6 @@ impl core::ops::BitOrAssign for mask8x32 { *self = self.simd.or_mask8x32(*self, rhs); } } -impl core::ops::BitOr for mask8x32 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i8) -> Self::Output { - self.simd.or_mask8x32(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask8x32 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i8) { - *self = self.simd.or_mask8x32(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i8 { - type Output = mask8x32; - #[inline(always)] - fn bitor(self, rhs: mask8x32) -> Self::Output { - rhs.simd.or_mask8x32(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask8x32 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -3231,26 +2951,6 @@ impl core::ops::BitXorAssign for mask8x32 { *self = self.simd.xor_mask8x32(*self, rhs); } } -impl core::ops::BitXor for mask8x32 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i8) -> Self::Output { - self.simd.xor_mask8x32(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask8x32 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i8) { - *self = self.simd.xor_mask8x32(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i8 { - type Output = mask8x32; - #[inline(always)] - fn bitxor(self, rhs: mask8x32) -> Self::Output { - rhs.simd.xor_mask8x32(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask8x32 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -3834,26 +3534,6 @@ impl core::ops::BitAndAssign for mask16x16 { *self = self.simd.and_mask16x16(*self, rhs); } } -impl core::ops::BitAnd for mask16x16 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i16) -> Self::Output { - self.simd.and_mask16x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask16x16 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i16) { - *self = self.simd.and_mask16x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i16 { - type Output = mask16x16; - #[inline(always)] - fn bitand(self, rhs: mask16x16) -> Self::Output { - rhs.simd.and_mask16x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask16x16 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -3869,26 +3549,6 @@ impl core::ops::BitOrAssign for mask16x16 { *self = self.simd.or_mask16x16(*self, rhs); } } -impl core::ops::BitOr for mask16x16 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i16) -> Self::Output { - self.simd.or_mask16x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask16x16 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i16) { - *self = self.simd.or_mask16x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i16 { - type Output = mask16x16; - #[inline(always)] - fn bitor(self, rhs: mask16x16) -> Self::Output { - rhs.simd.or_mask16x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask16x16 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -3904,26 +3564,6 @@ impl core::ops::BitXorAssign for mask16x16 { *self = self.simd.xor_mask16x16(*self, rhs); } } -impl core::ops::BitXor for mask16x16 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i16) -> Self::Output { - self.simd.xor_mask16x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask16x16 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i16) { - *self = self.simd.xor_mask16x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i16 { - type Output = mask16x16; - #[inline(always)] - fn bitxor(self, rhs: mask16x16) -> Self::Output { - rhs.simd.xor_mask16x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask16x16 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -4507,26 +4147,6 @@ impl core::ops::BitAndAssign for mask32x8 { *self = self.simd.and_mask32x8(*self, rhs); } } -impl core::ops::BitAnd for mask32x8 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i32) -> Self::Output { - self.simd.and_mask32x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask32x8 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i32) { - *self = self.simd.and_mask32x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i32 { - type Output = mask32x8; - #[inline(always)] - fn bitand(self, rhs: mask32x8) -> Self::Output { - rhs.simd.and_mask32x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask32x8 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -4542,26 +4162,6 @@ impl core::ops::BitOrAssign for mask32x8 { *self = self.simd.or_mask32x8(*self, rhs); } } -impl core::ops::BitOr for mask32x8 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i32) -> Self::Output { - self.simd.or_mask32x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask32x8 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i32) { - *self = self.simd.or_mask32x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i32 { - type Output = mask32x8; - #[inline(always)] - fn bitor(self, rhs: mask32x8) -> Self::Output { - rhs.simd.or_mask32x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask32x8 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -4577,26 +4177,6 @@ impl core::ops::BitXorAssign for mask32x8 { *self = self.simd.xor_mask32x8(*self, rhs); } } -impl core::ops::BitXor for mask32x8 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i32) -> Self::Output { - self.simd.xor_mask32x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask32x8 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i32) { - *self = self.simd.xor_mask32x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i32 { - type Output = mask32x8; - #[inline(always)] - fn bitxor(self, rhs: mask32x8) -> Self::Output { - rhs.simd.xor_mask32x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask32x8 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -4768,26 +4348,6 @@ impl core::ops::BitAndAssign for mask64x4 { *self = self.simd.and_mask64x4(*self, rhs); } } -impl core::ops::BitAnd for mask64x4 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i64) -> Self::Output { - self.simd.and_mask64x4(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask64x4 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i64) { - *self = self.simd.and_mask64x4(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i64 { - type Output = mask64x4; - #[inline(always)] - fn bitand(self, rhs: mask64x4) -> Self::Output { - rhs.simd.and_mask64x4(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask64x4 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -4803,26 +4363,6 @@ impl core::ops::BitOrAssign for mask64x4 { *self = self.simd.or_mask64x4(*self, rhs); } } -impl core::ops::BitOr for mask64x4 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i64) -> Self::Output { - self.simd.or_mask64x4(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask64x4 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i64) { - *self = self.simd.or_mask64x4(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i64 { - type Output = mask64x4; - #[inline(always)] - fn bitor(self, rhs: mask64x4) -> Self::Output { - rhs.simd.or_mask64x4(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask64x4 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -4838,26 +4378,6 @@ impl core::ops::BitXorAssign for mask64x4 { *self = self.simd.xor_mask64x4(*self, rhs); } } -impl core::ops::BitXor for mask64x4 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i64) -> Self::Output { - self.simd.xor_mask64x4(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask64x4 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i64) { - *self = self.simd.xor_mask64x4(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i64 { - type Output = mask64x4; - #[inline(always)] - fn bitxor(self, rhs: mask64x4) -> Self::Output { - rhs.simd.xor_mask64x4(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask64x4 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -5589,26 +5109,6 @@ impl core::ops::BitAndAssign for mask8x64 { *self = self.simd.and_mask8x64(*self, rhs); } } -impl core::ops::BitAnd for mask8x64 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i8) -> Self::Output { - self.simd.and_mask8x64(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask8x64 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i8) { - *self = self.simd.and_mask8x64(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i8 { - type Output = mask8x64; - #[inline(always)] - fn bitand(self, rhs: mask8x64) -> Self::Output { - rhs.simd.and_mask8x64(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask8x64 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -5624,26 +5124,6 @@ impl core::ops::BitOrAssign for mask8x64 { *self = self.simd.or_mask8x64(*self, rhs); } } -impl core::ops::BitOr for mask8x64 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i8) -> Self::Output { - self.simd.or_mask8x64(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask8x64 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i8) { - *self = self.simd.or_mask8x64(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i8 { - type Output = mask8x64; - #[inline(always)] - fn bitor(self, rhs: mask8x64) -> Self::Output { - rhs.simd.or_mask8x64(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask8x64 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -5659,26 +5139,6 @@ impl core::ops::BitXorAssign for mask8x64 { *self = self.simd.xor_mask8x64(*self, rhs); } } -impl core::ops::BitXor for mask8x64 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i8) -> Self::Output { - self.simd.xor_mask8x64(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask8x64 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i8) { - *self = self.simd.xor_mask8x64(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i8 { - type Output = mask8x64; - #[inline(always)] - fn bitxor(self, rhs: mask8x64) -> Self::Output { - rhs.simd.xor_mask8x64(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask8x64 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -6262,26 +5722,6 @@ impl core::ops::BitAndAssign for mask16x32 { *self = self.simd.and_mask16x32(*self, rhs); } } -impl core::ops::BitAnd for mask16x32 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i16) -> Self::Output { - self.simd.and_mask16x32(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask16x32 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i16) { - *self = self.simd.and_mask16x32(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i16 { - type Output = mask16x32; - #[inline(always)] - fn bitand(self, rhs: mask16x32) -> Self::Output { - rhs.simd.and_mask16x32(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask16x32 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -6297,26 +5737,6 @@ impl core::ops::BitOrAssign for mask16x32 { *self = self.simd.or_mask16x32(*self, rhs); } } -impl core::ops::BitOr for mask16x32 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i16) -> Self::Output { - self.simd.or_mask16x32(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask16x32 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i16) { - *self = self.simd.or_mask16x32(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i16 { - type Output = mask16x32; - #[inline(always)] - fn bitor(self, rhs: mask16x32) -> Self::Output { - rhs.simd.or_mask16x32(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask16x32 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -6332,26 +5752,6 @@ impl core::ops::BitXorAssign for mask16x32 { *self = self.simd.xor_mask16x32(*self, rhs); } } -impl core::ops::BitXor for mask16x32 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i16) -> Self::Output { - self.simd.xor_mask16x32(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask16x32 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i16) { - *self = self.simd.xor_mask16x32(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i16 { - type Output = mask16x32; - #[inline(always)] - fn bitxor(self, rhs: mask16x32) -> Self::Output { - rhs.simd.xor_mask16x32(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask16x32 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -6935,26 +6335,6 @@ impl core::ops::BitAndAssign for mask32x16 { *self = self.simd.and_mask32x16(*self, rhs); } } -impl core::ops::BitAnd for mask32x16 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i32) -> Self::Output { - self.simd.and_mask32x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask32x16 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i32) { - *self = self.simd.and_mask32x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i32 { - type Output = mask32x16; - #[inline(always)] - fn bitand(self, rhs: mask32x16) -> Self::Output { - rhs.simd.and_mask32x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask32x16 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -6970,26 +6350,6 @@ impl core::ops::BitOrAssign for mask32x16 { *self = self.simd.or_mask32x16(*self, rhs); } } -impl core::ops::BitOr for mask32x16 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i32) -> Self::Output { - self.simd.or_mask32x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask32x16 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i32) { - *self = self.simd.or_mask32x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i32 { - type Output = mask32x16; - #[inline(always)] - fn bitor(self, rhs: mask32x16) -> Self::Output { - rhs.simd.or_mask32x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask32x16 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -7005,26 +6365,6 @@ impl core::ops::BitXorAssign for mask32x16 { *self = self.simd.xor_mask32x16(*self, rhs); } } -impl core::ops::BitXor for mask32x16 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i32) -> Self::Output { - self.simd.xor_mask32x16(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask32x16 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i32) { - *self = self.simd.xor_mask32x16(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i32 { - type Output = mask32x16; - #[inline(always)] - fn bitxor(self, rhs: mask32x16) -> Self::Output { - rhs.simd.xor_mask32x16(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask32x16 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] @@ -7196,26 +6536,6 @@ impl core::ops::BitAndAssign for mask64x8 { *self = self.simd.and_mask64x8(*self, rhs); } } -impl core::ops::BitAnd for mask64x8 { - type Output = Self; - #[inline(always)] - fn bitand(self, rhs: i64) -> Self::Output { - self.simd.and_mask64x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitAndAssign for mask64x8 { - #[inline(always)] - fn bitand_assign(&mut self, rhs: i64) { - *self = self.simd.and_mask64x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitAnd> for i64 { - type Output = mask64x8; - #[inline(always)] - fn bitand(self, rhs: mask64x8) -> Self::Output { - rhs.simd.and_mask64x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitOr for mask64x8 { type Output = Self; #[doc = "Compute the logical OR of two masks."] @@ -7231,26 +6551,6 @@ impl core::ops::BitOrAssign for mask64x8 { *self = self.simd.or_mask64x8(*self, rhs); } } -impl core::ops::BitOr for mask64x8 { - type Output = Self; - #[inline(always)] - fn bitor(self, rhs: i64) -> Self::Output { - self.simd.or_mask64x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitOrAssign for mask64x8 { - #[inline(always)] - fn bitor_assign(&mut self, rhs: i64) { - *self = self.simd.or_mask64x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitOr> for i64 { - type Output = mask64x8; - #[inline(always)] - fn bitor(self, rhs: mask64x8) -> Self::Output { - rhs.simd.or_mask64x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::BitXor for mask64x8 { type Output = Self; #[doc = "Compute the logical XOR of two masks."] @@ -7266,26 +6566,6 @@ impl core::ops::BitXorAssign for mask64x8 { *self = self.simd.xor_mask64x8(*self, rhs); } } -impl core::ops::BitXor for mask64x8 { - type Output = Self; - #[inline(always)] - fn bitxor(self, rhs: i64) -> Self::Output { - self.simd.xor_mask64x8(self, rhs.simd_into(self.simd)) - } -} -impl core::ops::BitXorAssign for mask64x8 { - #[inline(always)] - fn bitxor_assign(&mut self, rhs: i64) { - *self = self.simd.xor_mask64x8(*self, rhs.simd_into(self.simd)); - } -} -impl core::ops::BitXor> for i64 { - type Output = mask64x8; - #[inline(always)] - fn bitxor(self, rhs: mask64x8) -> Self::Output { - rhs.simd.xor_mask64x8(self.simd_into(rhs.simd), rhs) - } -} impl core::ops::Not for mask64x8 { type Output = Self; #[doc = "Compute the logical NOT of the mask."] diff --git a/fearless_simd/src/generated/simd_trait.rs b/fearless_simd/src/generated/simd_trait.rs index 370cf762..01a5ee0f 100644 --- a/fearless_simd/src/generated/simd_trait.rs +++ b/fearless_simd/src/generated/simd_trait.rs @@ -95,25 +95,23 @@ pub trait Simd: > + SimdCvtTruncate + core::ops::Neg; #[doc = r" A native-width SIMD mask with 8-bit lanes."] - type mask8s: SimdMask, Bytes = ::Bytes> + type mask8s: SimdMask + Select + Select + Select; #[doc = r" A native-width SIMD mask with 16-bit lanes."] - type mask16s: SimdMask, Bytes = ::Bytes> + type mask16s: SimdMask + Select + Select + Select; #[doc = r" A native-width SIMD mask with 32-bit lanes."] - type mask32s: SimdMask, Bytes = ::Bytes> + type mask32s: SimdMask + Select + Select + Select + Select; #[doc = r" A native-width SIMD mask with 64-bit lanes."] - type mask64s: SimdMask> - + Select - + Select; + type mask64s: SimdMask + Select + Select; #[doc = r" This SIMD token's feature level."] fn level(self) -> Level; #[doc = r" Call function with CPU features enabled."] @@ -162,15 +160,15 @@ pub trait Simd: fn div_f32x4(self, a: f32x4, b: f32x4) -> f32x4; #[doc = "Return a vector with the magnitude of `a` and the sign of `b` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign_f32x4(self, a: f32x4, b: f32x4) -> f32x4; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_f32x4(self, a: f32x4, b: f32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_f32x4(self, a: f32x4, b: f32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_f32x4(self, a: f32x4, b: f32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_f32x4(self, a: f32x4, b: f32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_f32x4(self, a: f32x4, b: f32x4) -> mask32x4; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_f32x4(self, a: f32x4, b: f32x4) -> f32x4; @@ -206,7 +204,7 @@ pub trait Simd: fn fract_f32x4(self, a: f32x4) -> f32x4; #[doc = "Return the integer part of each element, rounding towards zero."] fn trunc_f32x4(self, a: f32x4) -> f32x4; - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_f32x4(self, a: mask32x4, b: f32x4, c: f32x4) -> f32x4; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_f32x4(self, a: f32x4, b: f32x4) -> f32x8; @@ -274,15 +272,15 @@ pub trait Simd: fn shr_i8x16(self, a: i8x16, shift: u32) -> i8x16; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i8x16(self, a: i8x16, b: i8x16) -> i8x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i8x16(self, a: i8x16, b: i8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i8x16(self, a: i8x16, b: i8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i8x16(self, a: i8x16, b: i8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i8x16(self, a: i8x16, b: i8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i8x16(self, a: i8x16, b: i8x16) -> mask8x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i8x16(self, a: i8x16, b: i8x16) -> i8x16; @@ -296,7 +294,7 @@ pub trait Simd: fn interleave_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i8x16(self, a: i8x16, b: i8x16) -> (i8x16, i8x16); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i8x16(self, a: mask8x16, b: i8x16, c: i8x16) -> i8x16; #[doc = "Return the element-wise minimum of two vectors."] fn min_i8x16(self, a: i8x16, b: i8x16) -> i8x16; @@ -358,15 +356,15 @@ pub trait Simd: fn shr_u8x16(self, a: u8x16, shift: u32) -> u8x16; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u8x16(self, a: u8x16, b: u8x16) -> u8x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u8x16(self, a: u8x16, b: u8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u8x16(self, a: u8x16, b: u8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u8x16(self, a: u8x16, b: u8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u8x16(self, a: u8x16, b: u8x16) -> mask8x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u8x16(self, a: u8x16, b: u8x16) -> mask8x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u8x16(self, a: u8x16, b: u8x16) -> u8x16; @@ -380,7 +378,7 @@ pub trait Simd: fn interleave_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u8x16(self, a: u8x16, b: u8x16) -> (u8x16, u8x16); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u8x16(self, a: mask8x16, b: u8x16, c: u8x16) -> u8x16; #[doc = "Return the element-wise minimum of two vectors."] fn min_u8x16(self, a: u8x16, b: u8x16) -> u8x16; @@ -392,36 +390,12 @@ pub trait Simd: fn widen_u8x16(self, a: u8x16) -> u16x16; #[doc = "Reinterpret the bits of this vector as a vector of `u32` elements.\n\nThe total bit width is preserved; the number of elements changes accordingly."] fn reinterpret_u32_u8x16(self, a: u8x16) -> u32x4; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask8x16(self, val: i8) -> mask8x16; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask8x16(self, val: bool) -> mask8x16; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask8x16(self, a: mask8x16) -> [i8; 16usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask8x16(self, a: mask8x16) -> u8x16; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16; #[doc = "Compute the logical AND of two masks."] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16; #[doc = "Compute the logical OR of two masks."] @@ -430,22 +404,22 @@ pub trait Simd: fn xor_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16; #[doc = "Compute the logical NOT of the mask."] fn not_mask8x16(self, a: mask8x16) -> mask8x16; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask8x16( self, a: mask8x16, b: mask8x16, c: mask8x16, ) -> mask8x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask8x16(self, a: mask8x16) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask8x16(self, a: mask8x16) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask8x16(self, a: mask8x16) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask8x16(self, a: mask8x16) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x32; @@ -497,15 +471,15 @@ pub trait Simd: fn shr_i16x8(self, a: i16x8, shift: u32) -> i16x8; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i16x8(self, a: i16x8, b: i16x8) -> i16x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i16x8(self, a: i16x8, b: i16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i16x8(self, a: i16x8, b: i16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i16x8(self, a: i16x8, b: i16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i16x8(self, a: i16x8, b: i16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i16x8(self, a: i16x8, b: i16x8) -> mask16x8; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i16x8(self, a: i16x8, b: i16x8) -> i16x8; @@ -519,7 +493,7 @@ pub trait Simd: fn interleave_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i16x8(self, a: mask16x8, b: i16x8, c: i16x8) -> i16x8; #[doc = "Return the element-wise minimum of two vectors."] fn min_i16x8(self, a: i16x8, b: i16x8) -> i16x8; @@ -581,15 +555,15 @@ pub trait Simd: fn shr_u16x8(self, a: u16x8, shift: u32) -> u16x8; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u16x8(self, a: u16x8, b: u16x8) -> u16x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u16x8(self, a: u16x8, b: u16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u16x8(self, a: u16x8, b: u16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u16x8(self, a: u16x8, b: u16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u16x8(self, a: u16x8, b: u16x8) -> mask16x8; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u16x8(self, a: u16x8, b: u16x8) -> mask16x8; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u16x8(self, a: u16x8, b: u16x8) -> u16x8; @@ -603,7 +577,7 @@ pub trait Simd: fn interleave_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u16x8(self, a: mask16x8, b: u16x8, c: u16x8) -> u16x8; #[doc = "Return the element-wise minimum of two vectors."] fn min_u16x8(self, a: u16x8, b: u16x8) -> u16x8; @@ -615,36 +589,12 @@ pub trait Simd: fn reinterpret_u8_u16x8(self, a: u16x8) -> u8x16; #[doc = "Reinterpret the bits of this vector as a vector of `u32` elements.\n\nThe total bit width is preserved; the number of elements changes accordingly."] fn reinterpret_u32_u16x8(self, a: u16x8) -> u32x4; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask16x8(self, val: i16) -> mask16x8; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask16x8(self, val: bool) -> mask16x8; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask16x8(self, a: mask16x8) -> [i16; 8usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask16x8(self, a: mask16x8) -> u8x16; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8; #[doc = "Compute the logical AND of two masks."] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8; #[doc = "Compute the logical OR of two masks."] @@ -653,22 +603,22 @@ pub trait Simd: fn xor_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8; #[doc = "Compute the logical NOT of the mask."] fn not_mask16x8(self, a: mask16x8) -> mask16x8; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask16x8( self, a: mask16x8, b: mask16x8, c: mask16x8, ) -> mask16x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask16x8(self, a: mask16x8) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask16x8(self, a: mask16x8) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask16x8(self, a: mask16x8) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask16x8(self, a: mask16x8) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x16; @@ -720,15 +670,15 @@ pub trait Simd: fn shr_i32x4(self, a: i32x4, shift: u32) -> i32x4; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i32x4(self, a: i32x4, b: i32x4) -> i32x4; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i32x4(self, a: i32x4, b: i32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i32x4(self, a: i32x4, b: i32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i32x4(self, a: i32x4, b: i32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i32x4(self, a: i32x4, b: i32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i32x4(self, a: i32x4, b: i32x4) -> mask32x4; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i32x4(self, a: i32x4, b: i32x4) -> i32x4; @@ -742,7 +692,7 @@ pub trait Simd: fn interleave_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i32x4(self, a: mask32x4, b: i32x4, c: i32x4) -> i32x4; #[doc = "Return the element-wise minimum of two vectors."] fn min_i32x4(self, a: i32x4, b: i32x4) -> i32x4; @@ -806,15 +756,15 @@ pub trait Simd: fn shr_u32x4(self, a: u32x4, shift: u32) -> u32x4; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u32x4(self, a: u32x4, b: u32x4) -> u32x4; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u32x4(self, a: u32x4, b: u32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u32x4(self, a: u32x4, b: u32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u32x4(self, a: u32x4, b: u32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u32x4(self, a: u32x4, b: u32x4) -> mask32x4; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u32x4(self, a: u32x4, b: u32x4) -> mask32x4; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u32x4(self, a: u32x4, b: u32x4) -> u32x4; @@ -828,7 +778,7 @@ pub trait Simd: fn interleave_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u32x4(self, a: mask32x4, b: u32x4, c: u32x4) -> u32x4; #[doc = "Return the element-wise minimum of two vectors."] fn min_u32x4(self, a: u32x4, b: u32x4) -> u32x4; @@ -840,36 +790,12 @@ pub trait Simd: fn reinterpret_u8_u32x4(self, a: u32x4) -> u8x16; #[doc = "Convert each unsigned 32-bit integer element to a floating-point value.\n\nValues that cannot be exactly represented are rounded to the nearest representable value."] fn cvt_f32_u32x4(self, a: u32x4) -> f32x4; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask32x4(self, val: i32) -> mask32x4; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask32x4(self, val: bool) -> mask32x4; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask32x4(self, a: mask32x4) -> u8x16; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4; #[doc = "Compute the logical AND of two masks."] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4; #[doc = "Compute the logical OR of two masks."] @@ -878,22 +804,22 @@ pub trait Simd: fn xor_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4; #[doc = "Compute the logical NOT of the mask."] fn not_mask32x4(self, a: mask32x4) -> mask32x4; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask32x4( self, a: mask32x4, b: mask32x4, c: mask32x4, ) -> mask32x4; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask32x4(self, a: mask32x4) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask32x4(self, a: mask32x4) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask32x4(self, a: mask32x4) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask32x4(self, a: mask32x4) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x8; @@ -939,15 +865,15 @@ pub trait Simd: fn div_f64x2(self, a: f64x2, b: f64x2) -> f64x2; #[doc = "Return a vector with the magnitude of `a` and the sign of `b` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign_f64x2(self, a: f64x2, b: f64x2) -> f64x2; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_f64x2(self, a: f64x2, b: f64x2) -> mask64x2; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_f64x2(self, a: f64x2, b: f64x2) -> mask64x2; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_f64x2(self, a: f64x2, b: f64x2) -> mask64x2; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_f64x2(self, a: f64x2, b: f64x2) -> mask64x2; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_f64x2(self, a: f64x2, b: f64x2) -> mask64x2; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_f64x2(self, a: f64x2, b: f64x2) -> f64x2; @@ -983,42 +909,18 @@ pub trait Simd: fn fract_f64x2(self, a: f64x2) -> f64x2; #[doc = "Return the integer part of each element, rounding towards zero."] fn trunc_f64x2(self, a: f64x2) -> f64x2; - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_f64x2(self, a: mask64x2, b: f64x2, c: f64x2) -> f64x2; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_f64x2(self, a: f64x2, b: f64x2) -> f64x4; #[doc = "Reinterpret the bits of this vector as a vector of `f32` elements.\n\nThe number of elements in the result is twice that of the input."] fn reinterpret_f32_f64x2(self, a: f64x2) -> f32x4; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask64x2(self, val: i64) -> mask64x2; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask64x2(self, val: bool) -> mask64x2; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask64x2(self, a: mask64x2) -> [i64; 2usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask64x2(self, a: mask64x2) -> u8x16; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2; #[doc = "Compute the logical AND of two masks."] fn and_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2; #[doc = "Compute the logical OR of two masks."] @@ -1027,22 +929,22 @@ pub trait Simd: fn xor_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2; #[doc = "Compute the logical NOT of the mask."] fn not_mask64x2(self, a: mask64x2) -> mask64x2; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask64x2( self, a: mask64x2, b: mask64x2, c: mask64x2, ) -> mask64x2; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask64x2(self, a: mask64x2) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask64x2(self, a: mask64x2) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask64x2(self, a: mask64x2) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask64x2(self, a: mask64x2) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x4; @@ -1088,15 +990,15 @@ pub trait Simd: fn div_f32x8(self, a: f32x8, b: f32x8) -> f32x8; #[doc = "Return a vector with the magnitude of `a` and the sign of `b` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign_f32x8(self, a: f32x8, b: f32x8) -> f32x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_f32x8(self, a: f32x8, b: f32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_f32x8(self, a: f32x8, b: f32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_f32x8(self, a: f32x8, b: f32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_f32x8(self, a: f32x8, b: f32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_f32x8(self, a: f32x8, b: f32x8) -> mask32x8; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_f32x8(self, a: f32x8, b: f32x8) -> f32x8; @@ -1132,7 +1034,7 @@ pub trait Simd: fn fract_f32x8(self, a: f32x8) -> f32x8; #[doc = "Return the integer part of each element, rounding towards zero."] fn trunc_f32x8(self, a: f32x8) -> f32x8; - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_f32x8(self, a: mask32x8, b: f32x8, c: f32x8) -> f32x8; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_f32x8(self, a: f32x8, b: f32x8) -> f32x16; @@ -1202,15 +1104,15 @@ pub trait Simd: fn shr_i8x32(self, a: i8x32, shift: u32) -> i8x32; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i8x32(self, a: i8x32, b: i8x32) -> i8x32; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i8x32(self, a: i8x32, b: i8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i8x32(self, a: i8x32, b: i8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i8x32(self, a: i8x32, b: i8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i8x32(self, a: i8x32, b: i8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i8x32(self, a: i8x32, b: i8x32) -> mask8x32; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i8x32(self, a: i8x32, b: i8x32) -> i8x32; @@ -1224,7 +1126,7 @@ pub trait Simd: fn interleave_i8x32(self, a: i8x32, b: i8x32) -> (i8x32, i8x32); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i8x32(self, a: i8x32, b: i8x32) -> (i8x32, i8x32); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i8x32(self, a: mask8x32, b: i8x32, c: i8x32) -> i8x32; #[doc = "Return the element-wise minimum of two vectors."] fn min_i8x32(self, a: i8x32, b: i8x32) -> i8x32; @@ -1288,15 +1190,15 @@ pub trait Simd: fn shr_u8x32(self, a: u8x32, shift: u32) -> u8x32; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u8x32(self, a: u8x32, b: u8x32) -> u8x32; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u8x32(self, a: u8x32, b: u8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u8x32(self, a: u8x32, b: u8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u8x32(self, a: u8x32, b: u8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u8x32(self, a: u8x32, b: u8x32) -> mask8x32; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u8x32(self, a: u8x32, b: u8x32) -> mask8x32; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u8x32(self, a: u8x32, b: u8x32) -> u8x32; @@ -1310,7 +1212,7 @@ pub trait Simd: fn interleave_u8x32(self, a: u8x32, b: u8x32) -> (u8x32, u8x32); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u8x32(self, a: u8x32, b: u8x32) -> (u8x32, u8x32); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u8x32(self, a: mask8x32, b: u8x32, c: u8x32) -> u8x32; #[doc = "Return the element-wise minimum of two vectors."] fn min_u8x32(self, a: u8x32, b: u8x32) -> u8x32; @@ -1324,36 +1226,12 @@ pub trait Simd: fn widen_u8x32(self, a: u8x32) -> u16x32; #[doc = "Reinterpret the bits of this vector as a vector of `u32` elements.\n\nThe total bit width is preserved; the number of elements changes accordingly."] fn reinterpret_u32_u8x32(self, a: u8x32) -> u32x8; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask8x32(self, val: i8) -> mask8x32; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask8x32(self, val: bool) -> mask8x32; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask8x32(self, a: mask8x32) -> [i8; 32usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask8x32(self, a: mask8x32) -> u8x32; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32; #[doc = "Compute the logical AND of two masks."] fn and_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32; #[doc = "Compute the logical OR of two masks."] @@ -1362,22 +1240,22 @@ pub trait Simd: fn xor_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32; #[doc = "Compute the logical NOT of the mask."] fn not_mask8x32(self, a: mask8x32) -> mask8x32; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask8x32( self, a: mask8x32, b: mask8x32, c: mask8x32, ) -> mask8x32; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask8x32(self, a: mask8x32) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask8x32(self, a: mask8x32) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask8x32(self, a: mask8x32) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask8x32(self, a: mask8x32) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x64; @@ -1431,15 +1309,15 @@ pub trait Simd: fn shr_i16x16(self, a: i16x16, shift: u32) -> i16x16; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i16x16(self, a: i16x16, b: i16x16) -> i16x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i16x16(self, a: i16x16, b: i16x16) -> mask16x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i16x16(self, a: i16x16, b: i16x16) -> i16x16; @@ -1453,7 +1331,7 @@ pub trait Simd: fn interleave_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i16x16(self, a: i16x16, b: i16x16) -> (i16x16, i16x16); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i16x16(self, a: mask16x16, b: i16x16, c: i16x16) -> i16x16; #[doc = "Return the element-wise minimum of two vectors."] fn min_i16x16(self, a: i16x16, b: i16x16) -> i16x16; @@ -1517,15 +1395,15 @@ pub trait Simd: fn shr_u16x16(self, a: u16x16, shift: u32) -> u16x16; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u16x16(self, a: u16x16, b: u16x16) -> u16x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u16x16(self, a: u16x16, b: u16x16) -> mask16x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u16x16(self, a: u16x16, b: u16x16) -> u16x16; @@ -1539,7 +1417,7 @@ pub trait Simd: fn interleave_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u16x16(self, a: u16x16, b: u16x16) -> (u16x16, u16x16); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u16x16(self, a: mask16x16, b: u16x16, c: u16x16) -> u16x16; #[doc = "Return the element-wise minimum of two vectors."] fn min_u16x16(self, a: u16x16, b: u16x16) -> u16x16; @@ -1555,36 +1433,12 @@ pub trait Simd: fn reinterpret_u8_u16x16(self, a: u16x16) -> u8x32; #[doc = "Reinterpret the bits of this vector as a vector of `u32` elements.\n\nThe total bit width is preserved; the number of elements changes accordingly."] fn reinterpret_u32_u16x16(self, a: u16x16) -> u32x8; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask16x16(self, val: i16) -> mask16x16; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask16x16(self, val: bool) -> mask16x16; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask16x16(self, a: mask16x16) -> [i16; 16usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask16x16(self, a: mask16x16) -> u8x32; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16; #[doc = "Compute the logical AND of two masks."] fn and_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16; #[doc = "Compute the logical OR of two masks."] @@ -1593,22 +1447,22 @@ pub trait Simd: fn xor_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16; #[doc = "Compute the logical NOT of the mask."] fn not_mask16x16(self, a: mask16x16) -> mask16x16; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask16x16( self, a: mask16x16, b: mask16x16, c: mask16x16, ) -> mask16x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask16x16(self, a: mask16x16) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask16x16(self, a: mask16x16) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask16x16(self, a: mask16x16) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask16x16(self, a: mask16x16) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x32; @@ -1662,15 +1516,15 @@ pub trait Simd: fn shr_i32x8(self, a: i32x8, shift: u32) -> i32x8; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i32x8(self, a: i32x8, b: i32x8) -> i32x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i32x8(self, a: i32x8, b: i32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i32x8(self, a: i32x8, b: i32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i32x8(self, a: i32x8, b: i32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i32x8(self, a: i32x8, b: i32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i32x8(self, a: i32x8, b: i32x8) -> mask32x8; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i32x8(self, a: i32x8, b: i32x8) -> i32x8; @@ -1684,7 +1538,7 @@ pub trait Simd: fn interleave_i32x8(self, a: i32x8, b: i32x8) -> (i32x8, i32x8); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i32x8(self, a: i32x8, b: i32x8) -> (i32x8, i32x8); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i32x8(self, a: mask32x8, b: i32x8, c: i32x8) -> i32x8; #[doc = "Return the element-wise minimum of two vectors."] fn min_i32x8(self, a: i32x8, b: i32x8) -> i32x8; @@ -1750,15 +1604,15 @@ pub trait Simd: fn shr_u32x8(self, a: u32x8, shift: u32) -> u32x8; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u32x8(self, a: u32x8, b: u32x8) -> u32x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u32x8(self, a: u32x8, b: u32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u32x8(self, a: u32x8, b: u32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u32x8(self, a: u32x8, b: u32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u32x8(self, a: u32x8, b: u32x8) -> mask32x8; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u32x8(self, a: u32x8, b: u32x8) -> mask32x8; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u32x8(self, a: u32x8, b: u32x8) -> u32x8; @@ -1772,7 +1626,7 @@ pub trait Simd: fn interleave_u32x8(self, a: u32x8, b: u32x8) -> (u32x8, u32x8); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u32x8(self, a: u32x8, b: u32x8) -> (u32x8, u32x8); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u32x8(self, a: mask32x8, b: u32x8, c: u32x8) -> u32x8; #[doc = "Return the element-wise minimum of two vectors."] fn min_u32x8(self, a: u32x8, b: u32x8) -> u32x8; @@ -1786,36 +1640,12 @@ pub trait Simd: fn reinterpret_u8_u32x8(self, a: u32x8) -> u8x32; #[doc = "Convert each unsigned 32-bit integer element to a floating-point value.\n\nValues that cannot be exactly represented are rounded to the nearest representable value."] fn cvt_f32_u32x8(self, a: u32x8) -> f32x8; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask32x8(self, val: i32) -> mask32x8; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask32x8(self, val: bool) -> mask32x8; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask32x8(self, a: mask32x8) -> u8x32; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8; #[doc = "Compute the logical AND of two masks."] fn and_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8; #[doc = "Compute the logical OR of two masks."] @@ -1824,22 +1654,22 @@ pub trait Simd: fn xor_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8; #[doc = "Compute the logical NOT of the mask."] fn not_mask32x8(self, a: mask32x8) -> mask32x8; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask32x8( self, a: mask32x8, b: mask32x8, c: mask32x8, ) -> mask32x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask32x8(self, a: mask32x8) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask32x8(self, a: mask32x8) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask32x8(self, a: mask32x8) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask32x8(self, a: mask32x8) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x16; @@ -1887,15 +1717,15 @@ pub trait Simd: fn div_f64x4(self, a: f64x4, b: f64x4) -> f64x4; #[doc = "Return a vector with the magnitude of `a` and the sign of `b` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign_f64x4(self, a: f64x4, b: f64x4) -> f64x4; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_f64x4(self, a: f64x4, b: f64x4) -> mask64x4; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_f64x4(self, a: f64x4, b: f64x4) -> mask64x4; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_f64x4(self, a: f64x4, b: f64x4) -> mask64x4; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_f64x4(self, a: f64x4, b: f64x4) -> mask64x4; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_f64x4(self, a: f64x4, b: f64x4) -> mask64x4; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_f64x4(self, a: f64x4, b: f64x4) -> f64x4; @@ -1931,7 +1761,7 @@ pub trait Simd: fn fract_f64x4(self, a: f64x4) -> f64x4; #[doc = "Return the integer part of each element, rounding towards zero."] fn trunc_f64x4(self, a: f64x4) -> f64x4; - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_f64x4(self, a: mask64x4, b: f64x4, c: f64x4) -> f64x4; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_f64x4(self, a: f64x4, b: f64x4) -> f64x8; @@ -1939,36 +1769,12 @@ pub trait Simd: fn split_f64x4(self, a: f64x4) -> (f64x2, f64x2); #[doc = "Reinterpret the bits of this vector as a vector of `f32` elements.\n\nThe number of elements in the result is twice that of the input."] fn reinterpret_f32_f64x4(self, a: f64x4) -> f32x8; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask64x4(self, val: i64) -> mask64x4; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask64x4(self, val: bool) -> mask64x4; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask64x4(self, a: mask64x4) -> u8x32; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4; #[doc = "Compute the logical AND of two masks."] fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4; #[doc = "Compute the logical OR of two masks."] @@ -1977,22 +1783,22 @@ pub trait Simd: fn xor_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4; #[doc = "Compute the logical NOT of the mask."] fn not_mask64x4(self, a: mask64x4) -> mask64x4; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask64x4( self, a: mask64x4, b: mask64x4, c: mask64x4, ) -> mask64x4; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask64x4(self, a: mask64x4) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask64x4(self, a: mask64x4) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask64x4(self, a: mask64x4) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask64x4(self, a: mask64x4) -> bool; #[doc = "Combine two vectors into a single vector with twice the width.\n\n`a` provides the lower elements and `b` provides the upper elements."] fn combine_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x8; @@ -2040,15 +1846,15 @@ pub trait Simd: fn div_f32x16(self, a: f32x16, b: f32x16) -> f32x16; #[doc = "Return a vector with the magnitude of `a` and the sign of `b` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign_f32x16(self, a: f32x16, b: f32x16) -> f32x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_f32x16(self, a: f32x16, b: f32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_f32x16(self, a: f32x16, b: f32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_f32x16(self, a: f32x16, b: f32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_f32x16(self, a: f32x16, b: f32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_f32x16(self, a: f32x16, b: f32x16) -> mask32x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_f32x16(self, a: f32x16, b: f32x16) -> f32x16; @@ -2084,7 +1890,7 @@ pub trait Simd: fn fract_f32x16(self, a: f32x16) -> f32x16; #[doc = "Return the integer part of each element, rounding towards zero."] fn trunc_f32x16(self, a: f32x16) -> f32x16; - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_f32x16(self, a: mask32x16, b: f32x16, c: f32x16) -> f32x16; #[doc = "Split a vector into two vectors of half the width.\n\nReturns a tuple of (lower half, upper half)."] fn split_f32x16(self, a: f32x16) -> (f32x8, f32x8); @@ -2156,15 +1962,15 @@ pub trait Simd: fn shr_i8x64(self, a: i8x64, shift: u32) -> i8x64; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i8x64(self, a: i8x64, b: i8x64) -> i8x64; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i8x64(self, a: i8x64, b: i8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i8x64(self, a: i8x64, b: i8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i8x64(self, a: i8x64, b: i8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i8x64(self, a: i8x64, b: i8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i8x64(self, a: i8x64, b: i8x64) -> mask8x64; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i8x64(self, a: i8x64, b: i8x64) -> i8x64; @@ -2178,7 +1984,7 @@ pub trait Simd: fn interleave_i8x64(self, a: i8x64, b: i8x64) -> (i8x64, i8x64); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i8x64(self, a: i8x64, b: i8x64) -> (i8x64, i8x64); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i8x64(self, a: mask8x64, b: i8x64, c: i8x64) -> i8x64; #[doc = "Return the element-wise minimum of two vectors."] fn min_i8x64(self, a: i8x64, b: i8x64) -> i8x64; @@ -2240,15 +2046,15 @@ pub trait Simd: fn shr_u8x64(self, a: u8x64, shift: u32) -> u8x64; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u8x64(self, a: u8x64, b: u8x64) -> u8x64; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u8x64(self, a: u8x64, b: u8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u8x64(self, a: u8x64, b: u8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u8x64(self, a: u8x64, b: u8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u8x64(self, a: u8x64, b: u8x64) -> mask8x64; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u8x64(self, a: u8x64, b: u8x64) -> mask8x64; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u8x64(self, a: u8x64, b: u8x64) -> u8x64; @@ -2262,7 +2068,7 @@ pub trait Simd: fn interleave_u8x64(self, a: u8x64, b: u8x64) -> (u8x64, u8x64); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u8x64(self, a: u8x64, b: u8x64) -> (u8x64, u8x64); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u8x64(self, a: mask8x64, b: u8x64, c: u8x64) -> u8x64; #[doc = "Return the element-wise minimum of two vectors."] fn min_u8x64(self, a: u8x64, b: u8x64) -> u8x64; @@ -2276,36 +2082,12 @@ pub trait Simd: fn store_interleaved_128_u8x64(self, a: u8x64, dest: &mut [u8; 64usize]) -> (); #[doc = "Reinterpret the bits of this vector as a vector of `u32` elements.\n\nThe total bit width is preserved; the number of elements changes accordingly."] fn reinterpret_u32_u8x64(self, a: u8x64) -> u32x16; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask8x64(self, val: i8) -> mask8x64; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask8x64(self, val: bool) -> mask8x64; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask8x64(self, a: mask8x64) -> [i8; 64usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask8x64(self, a: mask8x64) -> u8x64; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64; #[doc = "Compute the logical AND of two masks."] fn and_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64; #[doc = "Compute the logical OR of two masks."] @@ -2314,22 +2096,22 @@ pub trait Simd: fn xor_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64; #[doc = "Compute the logical NOT of the mask."] fn not_mask8x64(self, a: mask8x64) -> mask8x64; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask8x64( self, a: mask8x64, b: mask8x64, c: mask8x64, ) -> mask8x64; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask8x64(self, a: mask8x64) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask8x64(self, a: mask8x64) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask8x64(self, a: mask8x64) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask8x64(self, a: mask8x64) -> bool; #[doc = "Split a vector into two vectors of half the width.\n\nReturns a tuple of (lower half, upper half)."] fn split_mask8x64(self, a: mask8x64) -> (mask8x32, mask8x32); @@ -2381,15 +2163,15 @@ pub trait Simd: fn shr_i16x32(self, a: i16x32, shift: u32) -> i16x32; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i16x32(self, a: i16x32, b: i16x32) -> i16x32; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i16x32(self, a: i16x32, b: i16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i16x32(self, a: i16x32, b: i16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i16x32(self, a: i16x32, b: i16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i16x32(self, a: i16x32, b: i16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i16x32(self, a: i16x32, b: i16x32) -> mask16x32; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i16x32(self, a: i16x32, b: i16x32) -> i16x32; @@ -2403,7 +2185,7 @@ pub trait Simd: fn interleave_i16x32(self, a: i16x32, b: i16x32) -> (i16x32, i16x32); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i16x32(self, a: i16x32, b: i16x32) -> (i16x32, i16x32); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i16x32(self, a: mask16x32, b: i16x32, c: i16x32) -> i16x32; #[doc = "Return the element-wise minimum of two vectors."] fn min_i16x32(self, a: i16x32, b: i16x32) -> i16x32; @@ -2465,15 +2247,15 @@ pub trait Simd: fn shr_u16x32(self, a: u16x32, shift: u32) -> u16x32; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u16x32(self, a: u16x32, b: u16x32) -> u16x32; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u16x32(self, a: u16x32, b: u16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u16x32(self, a: u16x32, b: u16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u16x32(self, a: u16x32, b: u16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u16x32(self, a: u16x32, b: u16x32) -> mask16x32; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u16x32(self, a: u16x32, b: u16x32) -> mask16x32; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u16x32(self, a: u16x32, b: u16x32) -> u16x32; @@ -2487,7 +2269,7 @@ pub trait Simd: fn interleave_u16x32(self, a: u16x32, b: u16x32) -> (u16x32, u16x32); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u16x32(self, a: u16x32, b: u16x32) -> (u16x32, u16x32); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u16x32(self, a: mask16x32, b: u16x32, c: u16x32) -> u16x32; #[doc = "Return the element-wise minimum of two vectors."] fn min_u16x32(self, a: u16x32, b: u16x32) -> u16x32; @@ -2505,36 +2287,12 @@ pub trait Simd: fn reinterpret_u8_u16x32(self, a: u16x32) -> u8x64; #[doc = "Reinterpret the bits of this vector as a vector of `u32` elements.\n\nThe total bit width is preserved; the number of elements changes accordingly."] fn reinterpret_u32_u16x32(self, a: u16x32) -> u32x16; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask16x32(self, val: i16) -> mask16x32; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask16x32(self, val: bool) -> mask16x32; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask16x32(self, a: mask16x32) -> [i16; 32usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask16x32(self, a: mask16x32) -> u8x64; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32; #[doc = "Compute the logical AND of two masks."] fn and_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32; #[doc = "Compute the logical OR of two masks."] @@ -2543,22 +2301,22 @@ pub trait Simd: fn xor_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32; #[doc = "Compute the logical NOT of the mask."] fn not_mask16x32(self, a: mask16x32) -> mask16x32; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask16x32( self, a: mask16x32, b: mask16x32, c: mask16x32, ) -> mask16x32; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask16x32(self, a: mask16x32) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask16x32(self, a: mask16x32) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask16x32(self, a: mask16x32) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask16x32(self, a: mask16x32) -> bool; #[doc = "Split a vector into two vectors of half the width.\n\nReturns a tuple of (lower half, upper half)."] fn split_mask16x32(self, a: mask16x32) -> (mask16x16, mask16x16); @@ -2610,15 +2368,15 @@ pub trait Simd: fn shr_i32x16(self, a: i32x16, shift: u32) -> i32x16; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_i32x16(self, a: i32x16, b: i32x16) -> i32x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_i32x16(self, a: i32x16, b: i32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_i32x16(self, a: i32x16, b: i32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_i32x16(self, a: i32x16, b: i32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_i32x16(self, a: i32x16, b: i32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_i32x16(self, a: i32x16, b: i32x16) -> mask32x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_i32x16(self, a: i32x16, b: i32x16) -> i32x16; @@ -2632,7 +2390,7 @@ pub trait Simd: fn interleave_i32x16(self, a: i32x16, b: i32x16) -> (i32x16, i32x16); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_i32x16(self, a: i32x16, b: i32x16) -> (i32x16, i32x16); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_i32x16(self, a: mask32x16, b: i32x16, c: i32x16) -> i32x16; #[doc = "Return the element-wise minimum of two vectors."] fn min_i32x16(self, a: i32x16, b: i32x16) -> i32x16; @@ -2696,15 +2454,15 @@ pub trait Simd: fn shr_u32x16(self, a: u32x16, shift: u32) -> u32x16; #[doc = "Shift each element right by the corresponding element in another vector.\n\nFor unsigned integers, zeros are shifted in on the left. For signed integers, the sign bit is replicated.\n\nThis operation is not implemented in hardware on all platforms. On WebAssembly, and on x86 platforms without AVX2, this will use a fallback scalar implementation."] fn shrv_u32x16(self, a: u32x16, b: u32x16) -> u32x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_u32x16(self, a: u32x16, b: u32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_u32x16(self, a: u32x16, b: u32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_u32x16(self, a: u32x16, b: u32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_u32x16(self, a: u32x16, b: u32x16) -> mask32x16; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_u32x16(self, a: u32x16, b: u32x16) -> mask32x16; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_u32x16(self, a: u32x16, b: u32x16) -> u32x16; @@ -2718,7 +2476,7 @@ pub trait Simd: fn interleave_u32x16(self, a: u32x16, b: u32x16) -> (u32x16, u32x16); #[doc = "Deinterleave two vectors.\n\nThe first result contains all even-indexed elements from `a` followed by all even-indexed elements from `b`. The second result contains all odd-indexed elements from `a` followed by all odd-indexed elements from `b`.\n\nThe reverse of this operation is `interleave`.\n\nFor vectors `[a0, b0, a1, b1]` and `[a2, b2, a3, b3]`, returns `([a0, a1, a2, a3], [b0, b1, b2, b3])`."] fn deinterleave_u32x16(self, a: u32x16, b: u32x16) -> (u32x16, u32x16); - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_u32x16(self, a: mask32x16, b: u32x16, c: u32x16) -> u32x16; #[doc = "Return the element-wise minimum of two vectors."] fn min_u32x16(self, a: u32x16, b: u32x16) -> u32x16; @@ -2734,36 +2492,12 @@ pub trait Simd: fn reinterpret_u8_u32x16(self, a: u32x16) -> u8x64; #[doc = "Convert each unsigned 32-bit integer element to a floating-point value.\n\nValues that cannot be exactly represented are rounded to the nearest representable value."] fn cvt_f32_u32x16(self, a: u32x16) -> f32x16; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask32x16(self, val: i32) -> mask32x16; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask32x16(self, val: bool) -> mask32x16; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask32x16(self, a: mask32x16) -> [i32; 16usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask32x16(self, a: mask32x16) -> u8x64; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16; #[doc = "Compute the logical AND of two masks."] fn and_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16; #[doc = "Compute the logical OR of two masks."] @@ -2772,22 +2506,22 @@ pub trait Simd: fn xor_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16; #[doc = "Compute the logical NOT of the mask."] fn not_mask32x16(self, a: mask32x16) -> mask32x16; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask32x16( self, a: mask32x16, b: mask32x16, c: mask32x16, ) -> mask32x16; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask32x16(self, a: mask32x16) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask32x16(self, a: mask32x16) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask32x16(self, a: mask32x16) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask32x16(self, a: mask32x16) -> bool; #[doc = "Split a vector into two vectors of half the width.\n\nReturns a tuple of (lower half, upper half)."] fn split_mask32x16(self, a: mask32x16) -> (mask32x8, mask32x8); @@ -2833,15 +2567,15 @@ pub trait Simd: fn div_f64x8(self, a: f64x8, b: f64x8) -> f64x8; #[doc = "Return a vector with the magnitude of `a` and the sign of `b` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign_f64x8(self, a: f64x8, b: f64x8) -> f64x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_f64x8(self, a: f64x8, b: f64x8) -> mask64x8; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `a` is less than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `a` is less than `b`, and false if not."] fn simd_lt_f64x8(self, a: f64x8, b: f64x8) -> mask64x8; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `a` is less than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `a` is less than or equal to `b`, and false if not."] fn simd_le_f64x8(self, a: f64x8, b: f64x8) -> mask64x8; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `a` is greater than or equal to `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `a` is greater than or equal to `b`, and false if not."] fn simd_ge_f64x8(self, a: f64x8, b: f64x8) -> mask64x8; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `a` is greater than `b`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `a` is greater than `b`, and false if not."] fn simd_gt_f64x8(self, a: f64x8, b: f64x8) -> mask64x8; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low_f64x8(self, a: f64x8, b: f64x8) -> f64x8; @@ -2877,42 +2611,18 @@ pub trait Simd: fn fract_f64x8(self, a: f64x8) -> f64x8; #[doc = "Return the integer part of each element, rounding towards zero."] fn trunc_f64x8(self, a: f64x8) -> f64x8; - #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from b and c based on the mask operand a.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_f64x8(self, a: mask64x8, b: f64x8, c: f64x8) -> f64x8; #[doc = "Split a vector into two vectors of half the width.\n\nReturns a tuple of (lower half, upper half)."] fn split_f64x8(self, a: f64x8) -> (f64x4, f64x4); #[doc = "Reinterpret the bits of this vector as a vector of `f32` elements.\n\nThe number of elements in the result is twice that of the input."] fn reinterpret_f32_f64x8(self, a: f64x8) -> f32x16; - #[doc = "Create a SIMD vector with all elements set to the given value."] - fn splat_mask64x8(self, val: i64) -> mask64x8; - #[doc = "Create a SIMD vector from an array of the same length."] + #[doc = "Create a SIMD mask with all lanes set from the given boolean value."] + fn splat_mask64x8(self, val: bool) -> mask64x8; + #[doc = "Create a SIMD mask from signed integer mask lanes."] fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8; - #[doc = "Create a SIMD vector from an array of the same length."] - fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8; - #[doc = "Convert a SIMD vector to an array."] + #[doc = "Convert a SIMD mask to signed integer mask lanes."] fn as_array_mask64x8(self, a: mask64x8) -> [i64; 8usize]; - #[doc = "Project a reference to a SIMD vector to a reference to the equivalent array."] - fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize]; - #[doc = "Project a mutable reference to a SIMD vector to a mutable reference to the equivalent array."] - fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize]; - #[doc = "Store a SIMD vector into an array of the same length."] - fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> (); - #[doc = "Reinterpret a vector of bytes as a SIMD vector of a given type, with the equivalent byte length."] - fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8; - #[doc = "Reinterpret a SIMD vector as a vector of bytes, with the equivalent byte length."] - fn cvt_to_bytes_mask64x8(self, a: mask64x8) -> u8x64; - #[doc = "Concatenate `[self, rhs]` and extract `Self::N` elements starting at index `SHIFT`.\n\n`SHIFT` must be within [0, `Self::N`].\n\nThis can be used to implement a \"shift items\" operation by providing all zeroes as one operand. For a left shift, the right-hand side should be all zeroes. For a right shift by `M` items, the left-hand side should be all zeroes, and the shift amount will be `Self::N - M`.\n\nThis can also be used to rotate items within a vector by providing the same vector as both operands.\n\n```text\n\nslide::<1>([a b c d], [e f g h]) == [b c d e]\n\n```"] - fn slide_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8; - #[doc = "Like `slide`, but operates independently on each 128-bit block."] - fn slide_within_blocks_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8; #[doc = "Compute the logical AND of two masks."] fn and_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8; #[doc = "Compute the logical OR of two masks."] @@ -2921,22 +2631,22 @@ pub trait Simd: fn xor_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8; #[doc = "Compute the logical NOT of the mask."] fn not_mask64x8(self, a: mask64x8) -> mask64x8; - #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if each lane of a is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information."] + #[doc = "Select elements from `b` and `c` based on the mask operand `a`.\n\nThis operation's behavior is unspecified if a was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information."] fn select_mask64x8( self, a: mask64x8, b: mask64x8, c: mask64x8, ) -> mask64x8; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true_mask64x8(self, a: mask64x8) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true_mask64x8(self, a: mask64x8) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false_mask64x8(self, a: mask64x8) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false_mask64x8(self, a: mask64x8) -> bool; #[doc = "Split a vector into two vectors of half the width.\n\nReturns a tuple of (lower half, upper half)."] fn split_mask64x8(self, a: mask64x8) -> (mask64x4, mask64x4); @@ -3005,14 +2715,11 @@ pub trait SimdBase: #[doc = r" working with a native-width vector (e.g. [`Simd::f32s`]) and"] #[doc = r" want to process data in native-width chunks."] const N: usize; - #[doc = r" A SIMD vector mask with the same number of elements."] + #[doc = r" A SIMD vector mask with the same number of logical lanes."] #[doc = r""] - #[doc = r" The mask element is represented as an integer which is"] - #[doc = r" all-0 for `false` and all-1 for `true`. When we get deep"] - #[doc = r" into AVX-512, we need to think about predication masks."] - #[doc = r""] - #[doc = r" One possibility to consider is that the SIMD trait grows"] - #[doc = r" `maskAxB` associated types."] + #[doc = r" Masks intentionally do not implement [`SimdBase`]. SSE, NEON, WASM, and the"] + #[doc = r" fallback backend currently store masks as all-zero/all-one integer vectors, but"] + #[doc = r" AVX-512/RVV/SVE-style targets use compact predicate registers instead."] type Mask: SimdMask::Mask>; #[doc = r" A 128-bit SIMD vector of the same scalar type."] type Block: SimdBase; @@ -3096,15 +2803,15 @@ pub trait SimdFloat: fn sqrt(self) -> Self; #[doc = "Return a vector with the magnitude of `self` and the sign of `rhs` for each element.\n\nThis operation copies the sign bit, so if an input element is NaN, the output element will be a NaN with the same payload and a copied sign bit."] fn copysign(self, rhs: impl SimdInto) -> Self; - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `self` is less than `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `self` is less than `rhs`, and false if not."] fn simd_lt(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `self` is less than or equal to `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `self` is less than or equal to `rhs`, and false if not."] fn simd_le(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `self` is greater than or equal to `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `self` is greater than or equal to `rhs`, and false if not."] fn simd_ge(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `self` is greater than `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `self` is greater than `rhs`, and false if not."] fn simd_gt(self, rhs: impl SimdInto) -> Self::Mask; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low(self, rhs: impl SimdInto) -> Self; @@ -3186,15 +2893,15 @@ pub trait SimdInt: fn to_float>(self) -> T { T::float_from(self) } - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each element is all ones if `self` is less than `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than.\n\nReturns a mask where each logical lane is true if `self` is less than `rhs`, and false if not."] fn simd_lt(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each element is all ones if `self` is less than or equal to `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for less than or equal.\n\nReturns a mask where each logical lane is true if `self` is less than or equal to `rhs`, and false if not."] fn simd_le(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each element is all ones if `self` is greater than or equal to `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than or equal.\n\nReturns a mask where each logical lane is true if `self` is greater than or equal to `rhs`, and false if not."] fn simd_ge(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each element is all ones if `self` is greater than `rhs`, and all zeroes if not."] + #[doc = "Compare two vectors element-wise for greater than.\n\nReturns a mask where each logical lane is true if `self` is greater than `rhs`, and false if not."] fn simd_gt(self, rhs: impl SimdInto) -> Self::Mask; #[doc = "Interleave the lower half elements of two vectors.\n\nFor vectors `[a0, a1, a2, a3]` and `[b0, b1, b2, b3]`, returns `[a0, b0, a1, b1]`.\n\n**Note:** This operation is only useful if you need to discard elements `a2, a3, b2, b3`.\n For fully interleaving two vectors prefer `interleave`,\n which is faster than `zip_low` followed by `zip_high` on some platforms."] fn zip_low(self, rhs: impl SimdInto) -> Self; @@ -3214,31 +2921,52 @@ pub trait SimdInt: fn max(self, rhs: impl SimdInto) -> Self; } #[doc = r" Functionality implemented by SIMD masks."] +#[doc = r""] +#[doc = r" A mask has one logical boolean lane per SIMD lane. Its storage is intentionally opaque:"] +#[doc = r" current backends may use all-zero/all-one integer vectors internally, while future"] +#[doc = r" predicate-register backends may use a compact representation."] pub trait SimdMask: - SimdBase + Copy + + Sync + + Send + + 'static + Seal + + Select + core::ops::BitAnd + core::ops::BitAndAssign - + core::ops::BitAnd - + core::ops::BitAndAssign + core::ops::BitOr + core::ops::BitOrAssign - + core::ops::BitOr - + core::ops::BitOrAssign + core::ops::BitXor + core::ops::BitXorAssign - + core::ops::BitXor - + core::ops::BitXorAssign + core::ops::Not { - #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not."] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask; - #[doc = "Returns true if any elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = r" The signed integer type used when converting this mask to and from lane values."] + #[doc = r""] + #[doc = r" False lanes are encoded as all zeroes (integer value 0), and true lanes are encoded as all ones"] + #[doc = r" (integer value -1)."] + type Element: SimdElement; + #[doc = r" This mask type's lane count."] + const N: usize; + #[doc = r" Get the [`Simd`] implementation associated with this type."] + fn witness(&self) -> S; + #[doc = r" Create a SIMD mask with all lanes set to the given boolean value."] + fn splat(simd: S, val: bool) -> Self; + #[doc = r" Create a SIMD mask from signed integer mask lanes."] + #[doc = r""] + #[doc = r" The slice must be exactly the size of the SIMD mask."] + fn from_slice(simd: S, slice: &[Self::Element]) -> Self; + #[doc = r" Store this SIMD mask as signed integer mask lanes."] + #[doc = r""] + #[doc = r" The slice must be exactly the size of the SIMD mask."] + fn store_slice(&self, slice: &mut [Self::Element]); + #[doc = "Compare two vectors element-wise for equality.\n\nReturns a mask where each logical lane is true if the corresponding elements are equal, and false if not."] + fn simd_eq(self, rhs: impl SimdInto) -> Self; + #[doc = "Returns true if any logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_true(self) -> bool; - #[doc = "Returns true if all elements in this mask are true (all ones).\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are true.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_true(self) -> bool; - #[doc = "Returns true if any elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if any logical lanes in this mask are false.\n\nThis is logically equivalent to `!all_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn any_false(self) -> bool; - #[doc = "Returns true if all elements in this mask are false (all zeroes).\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nBehavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation."] + #[doc = "Returns true if all logical lanes in this mask are false.\n\nThis is logically equivalent to `!any_true`, but may be faster.\n\nMasks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\nBehavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\nThe behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\nThe [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation."] fn all_false(self) -> bool; } diff --git a/fearless_simd/src/generated/simd_types.rs b/fearless_simd/src/generated/simd_types.rs index f81e3a18..ec0e074f 100644 --- a/fearless_simd/src/generated/simd_types.rs +++ b/fearless_simd/src/generated/simd_types.rs @@ -625,12 +625,11 @@ impl crate::SimdCombine for u8x16 { self.simd.combine_u8x16(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 16 8-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 16 logical lanes corresponding to 8-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(16))] pub struct mask8x16 { pub(crate) val: S::mask8x16, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask8x16 {} impl SimdFrom<[i8; 16], S> for mask8x16 { @@ -645,118 +644,47 @@ impl From> for [i8; 16] { value.simd.as_array_mask8x16(value) } } -impl core::ops::Deref for mask8x16 { - type Target = [i8; 16]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask8x16(self) - } -} -impl core::ops::DerefMut for mask8x16 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask8x16(self) - } -} impl core::fmt::Debug for mask8x16 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask8x16", - &self.simd, - self.simd.as_array_ref_mask8x16(self), - ) + let lanes = self.simd.as_array_mask8x16(*self); + crate::support::simd_debug_impl(f, "mask8x16", &self.simd, &lanes) } } -impl SimdFrom for mask8x16 { +impl SimdFrom for mask8x16 { #[inline(always)] - fn simd_from(simd: S, value: i8) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask8x16(value) } } -impl core::ops::Index for mask8x16 { - type Output = i8; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask8x16(self)[i] - } -} -impl core::ops::IndexMut for mask8x16 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask8x16(self)[i] - } -} impl Select> for mask8x16 { #[inline(always)] fn select(self, if_true: mask8x16, if_false: mask8x16) -> mask8x16 { self.simd.select_mask8x16(self, if_true, if_false) } } -impl Bytes for mask8x16 { - type Bytes = u8x16; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask8x16(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask8x16(value) - } -} -impl SimdBase for mask8x16 { +impl crate::SimdMask for mask8x16 { type Element = i8; const N: usize = 16; - type Mask = mask8x16; - type Block = mask8x16; - type Array = [i8; 16]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i8] { - self.simd.as_array_ref_mask8x16(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i8] { - self.simd.as_array_mut_mask8x16(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask8x16(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i8]) -> Self { - simd.load_array_ref_mask8x16(slice.try_into().unwrap()) + let slice: &[i8; 16] = slice.try_into().unwrap(); + simd.load_array_mask8x16(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { - self.simd - .store_array_mask8x16(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i8) -> Self { - simd.splat_mask8x16(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i8) -> Self { - simd.load_array_mask8x16(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask8x16::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask8x16::(self, rhs.simd_into(self.simd)) + let slice: &mut [i8; 16] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask8x16(*self); } -} -impl crate::SimdMask for mask8x16 { #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask8x16(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -776,13 +704,6 @@ impl crate::SimdMask for mask8x16 { self.simd.all_false_mask8x16(self) } } -impl crate::SimdCombine for mask8x16 { - type Combined = mask8x32; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask8x16(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 8 [`i16`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, i16x8};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = i16x8::splat(simd, 1);\n let b = i16x8::simd_from(simd, 1);\n\n // From a slice:\n let c = i16x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]);\n\n // From an array:\n let d = i16x8::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8]);\n\n // From an element-wise function:\n let e = i16x8::from_fn(simd, |i| i as i16);\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(16))] @@ -1153,12 +1074,11 @@ impl crate::SimdCombine for u16x8 { self.simd.combine_u16x8(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 8 16-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 8 logical lanes corresponding to 16-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(16))] pub struct mask16x8 { pub(crate) val: S::mask16x8, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask16x8 {} impl SimdFrom<[i16; 8], S> for mask16x8 { @@ -1173,118 +1093,47 @@ impl From> for [i16; 8] { value.simd.as_array_mask16x8(value) } } -impl core::ops::Deref for mask16x8 { - type Target = [i16; 8]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask16x8(self) - } -} -impl core::ops::DerefMut for mask16x8 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask16x8(self) - } -} impl core::fmt::Debug for mask16x8 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask16x8", - &self.simd, - self.simd.as_array_ref_mask16x8(self), - ) + let lanes = self.simd.as_array_mask16x8(*self); + crate::support::simd_debug_impl(f, "mask16x8", &self.simd, &lanes) } } -impl SimdFrom for mask16x8 { +impl SimdFrom for mask16x8 { #[inline(always)] - fn simd_from(simd: S, value: i16) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask16x8(value) } } -impl core::ops::Index for mask16x8 { - type Output = i16; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask16x8(self)[i] - } -} -impl core::ops::IndexMut for mask16x8 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask16x8(self)[i] - } -} impl Select> for mask16x8 { #[inline(always)] fn select(self, if_true: mask16x8, if_false: mask16x8) -> mask16x8 { self.simd.select_mask16x8(self, if_true, if_false) } } -impl Bytes for mask16x8 { - type Bytes = u8x16; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask16x8(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask16x8(value) - } -} -impl SimdBase for mask16x8 { +impl crate::SimdMask for mask16x8 { type Element = i16; const N: usize = 8; - type Mask = mask16x8; - type Block = mask16x8; - type Array = [i16; 8]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i16] { - self.simd.as_array_ref_mask16x8(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i16] { - self.simd.as_array_mut_mask16x8(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask16x8(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i16]) -> Self { - simd.load_array_ref_mask16x8(slice.try_into().unwrap()) + let slice: &[i16; 8] = slice.try_into().unwrap(); + simd.load_array_mask16x8(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { - self.simd - .store_array_mask16x8(*self, slice.try_into().unwrap()); + let slice: &mut [i16; 8] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask16x8(*self); } #[inline(always)] - fn splat(simd: S, val: i16) -> Self { - simd.splat_mask16x8(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i16) -> Self { - simd.load_array_mask16x8(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask16x8::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask16x8::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask16x8 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask16x8(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -1304,13 +1153,6 @@ impl crate::SimdMask for mask16x8 { self.simd.all_false_mask16x8(self) } } -impl crate::SimdCombine for mask16x8 { - type Combined = mask16x16; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask16x8(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 4 [`i32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, i32x4};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = i32x4::splat(simd, 1);\n let b = i32x4::simd_from(simd, 1);\n\n // From a slice:\n let c = i32x4::from_slice(simd, &[1, 2, 3, 4]);\n\n // From an array:\n let d = i32x4::simd_from(simd, [1, 2, 3, 4]);\n\n // From an element-wise function:\n let e = i32x4::from_fn(simd, |i| i as i32);\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(16))] @@ -1705,12 +1547,11 @@ impl crate::SimdCombine for u32x4 { self.simd.combine_u32x4(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 4 32-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 4 logical lanes corresponding to 32-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(16))] pub struct mask32x4 { pub(crate) val: S::mask32x4, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask32x4 {} impl SimdFrom<[i32; 4], S> for mask32x4 { @@ -1725,118 +1566,47 @@ impl From> for [i32; 4] { value.simd.as_array_mask32x4(value) } } -impl core::ops::Deref for mask32x4 { - type Target = [i32; 4]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask32x4(self) - } -} -impl core::ops::DerefMut for mask32x4 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask32x4(self) - } -} impl core::fmt::Debug for mask32x4 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask32x4", - &self.simd, - self.simd.as_array_ref_mask32x4(self), - ) + let lanes = self.simd.as_array_mask32x4(*self); + crate::support::simd_debug_impl(f, "mask32x4", &self.simd, &lanes) } } -impl SimdFrom for mask32x4 { +impl SimdFrom for mask32x4 { #[inline(always)] - fn simd_from(simd: S, value: i32) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask32x4(value) } } -impl core::ops::Index for mask32x4 { - type Output = i32; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask32x4(self)[i] - } -} -impl core::ops::IndexMut for mask32x4 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask32x4(self)[i] - } -} impl Select> for mask32x4 { #[inline(always)] fn select(self, if_true: mask32x4, if_false: mask32x4) -> mask32x4 { self.simd.select_mask32x4(self, if_true, if_false) } } -impl Bytes for mask32x4 { - type Bytes = u8x16; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask32x4(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask32x4(value) - } -} -impl SimdBase for mask32x4 { +impl crate::SimdMask for mask32x4 { type Element = i32; const N: usize = 4; - type Mask = mask32x4; - type Block = mask32x4; - type Array = [i32; 4]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i32] { - self.simd.as_array_ref_mask32x4(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i32] { - self.simd.as_array_mut_mask32x4(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask32x4(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i32]) -> Self { - simd.load_array_ref_mask32x4(slice.try_into().unwrap()) + let slice: &[i32; 4] = slice.try_into().unwrap(); + simd.load_array_mask32x4(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { - self.simd - .store_array_mask32x4(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i32) -> Self { - simd.splat_mask32x4(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i32) -> Self { - simd.load_array_mask32x4(core::array::from_fn(f)) + let slice: &mut [i32; 4] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask32x4(*self); } #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask32x4::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask32x4::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask32x4 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask32x4(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -1856,13 +1626,6 @@ impl crate::SimdMask for mask32x4 { self.simd.all_false_mask32x4(self) } } -impl crate::SimdCombine for mask32x4 { - type Combined = mask32x8; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask32x4(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 2 [`f64`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f64x2};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f64x2::splat(simd, 1.0);\n let b = f64x2::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f64x2::from_slice(simd, &[1.0, 2.0]);\n\n // From an array:\n let d = f64x2::simd_from(simd, [1.0, 2.0]);\n\n // From an element-wise function:\n let e = f64x2::from_fn(simd, |i| i as f64);\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(16))] @@ -2098,12 +1861,11 @@ impl crate::SimdCombine for f64x2 { self.simd.combine_f64x2(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 2 64-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 2 logical lanes corresponding to 64-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(16))] pub struct mask64x2 { pub(crate) val: S::mask64x2, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask64x2 {} impl SimdFrom<[i64; 2], S> for mask64x2 { @@ -2118,118 +1880,47 @@ impl From> for [i64; 2] { value.simd.as_array_mask64x2(value) } } -impl core::ops::Deref for mask64x2 { - type Target = [i64; 2]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask64x2(self) - } -} -impl core::ops::DerefMut for mask64x2 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask64x2(self) - } -} impl core::fmt::Debug for mask64x2 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask64x2", - &self.simd, - self.simd.as_array_ref_mask64x2(self), - ) + let lanes = self.simd.as_array_mask64x2(*self); + crate::support::simd_debug_impl(f, "mask64x2", &self.simd, &lanes) } } -impl SimdFrom for mask64x2 { +impl SimdFrom for mask64x2 { #[inline(always)] - fn simd_from(simd: S, value: i64) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask64x2(value) } } -impl core::ops::Index for mask64x2 { - type Output = i64; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask64x2(self)[i] - } -} -impl core::ops::IndexMut for mask64x2 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask64x2(self)[i] - } -} impl Select> for mask64x2 { #[inline(always)] fn select(self, if_true: mask64x2, if_false: mask64x2) -> mask64x2 { self.simd.select_mask64x2(self, if_true, if_false) } } -impl Bytes for mask64x2 { - type Bytes = u8x16; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask64x2(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask64x2(value) - } -} -impl SimdBase for mask64x2 { +impl crate::SimdMask for mask64x2 { type Element = i64; const N: usize = 2; - type Mask = mask64x2; - type Block = mask64x2; - type Array = [i64; 2]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i64] { - self.simd.as_array_ref_mask64x2(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i64] { - self.simd.as_array_mut_mask64x2(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask64x2(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i64]) -> Self { - simd.load_array_ref_mask64x2(slice.try_into().unwrap()) + let slice: &[i64; 2] = slice.try_into().unwrap(); + simd.load_array_mask64x2(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i64]) { - self.simd - .store_array_mask64x2(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i64) -> Self { - simd.splat_mask64x2(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i64) -> Self { - simd.load_array_mask64x2(core::array::from_fn(f)) + let slice: &mut [i64; 2] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask64x2(*self); } #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask64x2::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask64x2::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask64x2 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask64x2(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -2249,13 +1940,6 @@ impl crate::SimdMask for mask64x2 { self.simd.all_false_mask64x2(self) } } -impl crate::SimdCombine for mask64x2 { - type Combined = mask64x4; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask64x2(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 8 [`f32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f32x8};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f32x8::splat(simd, 1.0);\n let b = f32x8::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f32x8::from_slice(simd, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);\n\n // From an array:\n let d = f32x8::simd_from(simd, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);\n\n // From an element-wise function:\n let e = f32x8::from_fn(simd, |i| i as f32);\n # use fearless_simd::f32x4;\n // From `Self::Block`:\n let f = f32x8::block_splat(f32x4::simd_from(simd, [1.0, 2.0, 3.0, 4.0]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(32))] @@ -2896,12 +2580,11 @@ impl crate::SimdCombine for u8x32 { self.simd.combine_u8x32(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 32 8-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 32 logical lanes corresponding to 8-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(32))] pub struct mask8x32 { pub(crate) val: S::mask8x32, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask8x32 {} impl SimdFrom<[i8; 32], S> for mask8x32 { @@ -2916,118 +2599,47 @@ impl From> for [i8; 32] { value.simd.as_array_mask8x32(value) } } -impl core::ops::Deref for mask8x32 { - type Target = [i8; 32]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask8x32(self) - } -} -impl core::ops::DerefMut for mask8x32 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask8x32(self) - } -} impl core::fmt::Debug for mask8x32 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask8x32", - &self.simd, - self.simd.as_array_ref_mask8x32(self), - ) + let lanes = self.simd.as_array_mask8x32(*self); + crate::support::simd_debug_impl(f, "mask8x32", &self.simd, &lanes) } } -impl SimdFrom for mask8x32 { - #[inline(always)] - fn simd_from(simd: S, value: i8) -> Self { - simd.splat_mask8x32(value) - } -} -impl core::ops::Index for mask8x32 { - type Output = i8; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask8x32(self)[i] - } -} -impl core::ops::IndexMut for mask8x32 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask8x32(self)[i] - } -} -impl Select> for mask8x32 { - #[inline(always)] - fn select(self, if_true: mask8x32, if_false: mask8x32) -> mask8x32 { - self.simd.select_mask8x32(self, if_true, if_false) - } -} -impl Bytes for mask8x32 { - type Bytes = u8x32; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask8x32(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask8x32(value) - } -} -impl SimdBase for mask8x32 { - type Element = i8; - const N: usize = 32; - type Mask = mask8x32; - type Block = mask8x16; - type Array = [i8; 32]; - #[inline(always)] - fn witness(&self) -> S { - self.simd - } - #[inline(always)] - fn as_slice(&self) -> &[i8] { - self.simd.as_array_ref_mask8x32(self).as_slice() - } +impl SimdFrom for mask8x32 { #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i8] { - self.simd.as_array_mut_mask8x32(self).as_mut_slice() + fn simd_from(simd: S, value: bool) -> Self { + simd.splat_mask8x32(value) } +} +impl Select> for mask8x32 { #[inline(always)] - fn from_slice(simd: S, slice: &[i8]) -> Self { - simd.load_array_ref_mask8x32(slice.try_into().unwrap()) + fn select(self, if_true: mask8x32, if_false: mask8x32) -> mask8x32 { + self.simd.select_mask8x32(self, if_true, if_false) } +} +impl crate::SimdMask for mask8x32 { + type Element = i8; + const N: usize = 32; #[inline(always)] - fn store_slice(&self, slice: &mut [i8]) { + fn witness(&self) -> S { self.simd - .store_array_mask8x32(*self, slice.try_into().unwrap()); } #[inline(always)] - fn splat(simd: S, val: i8) -> Self { + fn splat(simd: S, val: bool) -> Self { simd.splat_mask8x32(val) } #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block.simd.combine_mask8x16(block, block) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i8) -> Self { - simd.load_array_mask8x32(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask8x32::(self, rhs.simd_into(self.simd)) + fn from_slice(simd: S, slice: &[i8]) -> Self { + let slice: &[i8; 32] = slice.try_into().unwrap(); + simd.load_array_mask8x32(*slice) } #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask8x32::(self, rhs.simd_into(self.simd)) + fn store_slice(&self, slice: &mut [i8]) { + let slice: &mut [i8; 32] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask8x32(*self); } -} -impl crate::SimdMask for mask8x32 { #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask8x32(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -3047,20 +2659,6 @@ impl crate::SimdMask for mask8x32 { self.simd.all_false_mask8x32(self) } } -impl crate::SimdSplit for mask8x32 { - type Split = mask8x16; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask8x32(self) - } -} -impl crate::SimdCombine for mask8x32 { - type Combined = mask8x64; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask8x32(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 16 [`i16`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, i16x16};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = i16x16::splat(simd, 1);\n let b = i16x16::simd_from(simd, 1);\n\n // From a slice:\n let c = i16x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);\n\n // From an array:\n let d = i16x16::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);\n\n // From an element-wise function:\n let e = i16x16::from_fn(simd, |i| i as i16);\n # use fearless_simd::i16x8;\n // From `Self::Block`:\n let f = i16x16::block_splat(i16x8::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(32))] @@ -3457,12 +3055,11 @@ impl crate::SimdCombine for u16x16 { self.simd.combine_u16x16(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 16 16-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 16 logical lanes corresponding to 16-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(32))] pub struct mask16x16 { pub(crate) val: S::mask16x16, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask16x16 {} impl SimdFrom<[i16; 16], S> for mask16x16 { @@ -3477,118 +3074,47 @@ impl From> for [i16; 16] { value.simd.as_array_mask16x16(value) } } -impl core::ops::Deref for mask16x16 { - type Target = [i16; 16]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask16x16(self) - } -} -impl core::ops::DerefMut for mask16x16 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask16x16(self) - } -} impl core::fmt::Debug for mask16x16 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask16x16", - &self.simd, - self.simd.as_array_ref_mask16x16(self), - ) + let lanes = self.simd.as_array_mask16x16(*self); + crate::support::simd_debug_impl(f, "mask16x16", &self.simd, &lanes) } } -impl SimdFrom for mask16x16 { +impl SimdFrom for mask16x16 { #[inline(always)] - fn simd_from(simd: S, value: i16) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask16x16(value) } } -impl core::ops::Index for mask16x16 { - type Output = i16; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask16x16(self)[i] - } -} -impl core::ops::IndexMut for mask16x16 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask16x16(self)[i] - } -} impl Select> for mask16x16 { #[inline(always)] fn select(self, if_true: mask16x16, if_false: mask16x16) -> mask16x16 { self.simd.select_mask16x16(self, if_true, if_false) } } -impl Bytes for mask16x16 { - type Bytes = u8x32; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask16x16(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask16x16(value) - } -} -impl SimdBase for mask16x16 { +impl crate::SimdMask for mask16x16 { type Element = i16; const N: usize = 16; - type Mask = mask16x16; - type Block = mask16x8; - type Array = [i16; 16]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i16] { - self.simd.as_array_ref_mask16x16(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i16] { - self.simd.as_array_mut_mask16x16(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask16x16(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i16]) -> Self { - simd.load_array_ref_mask16x16(slice.try_into().unwrap()) + let slice: &[i16; 16] = slice.try_into().unwrap(); + simd.load_array_mask16x16(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { - self.simd - .store_array_mask16x16(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i16) -> Self { - simd.splat_mask16x16(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block.simd.combine_mask16x8(block, block) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i16) -> Self { - simd.load_array_mask16x16(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask16x16::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask16x16::(self, rhs.simd_into(self.simd)) + let slice: &mut [i16; 16] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask16x16(*self); } -} -impl crate::SimdMask for mask16x16 { #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask16x16(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -3608,20 +3134,6 @@ impl crate::SimdMask for mask16x16 { self.simd.all_false_mask16x16(self) } } -impl crate::SimdSplit for mask16x16 { - type Split = mask16x8; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask16x16(self) - } -} -impl crate::SimdCombine for mask16x16 { - type Combined = mask16x32; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask16x16(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 8 [`i32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, i32x8};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = i32x8::splat(simd, 1);\n let b = i32x8::simd_from(simd, 1);\n\n // From a slice:\n let c = i32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]);\n\n // From an array:\n let d = i32x8::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8]);\n\n // From an element-wise function:\n let e = i32x8::from_fn(simd, |i| i as i32);\n # use fearless_simd::i32x4;\n // From `Self::Block`:\n let f = i32x8::block_splat(i32x4::simd_from(simd, [1, 2, 3, 4]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(32))] @@ -4030,12 +3542,11 @@ impl crate::SimdCombine for u32x8 { self.simd.combine_u32x8(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 8 32-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 8 logical lanes corresponding to 32-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(32))] pub struct mask32x8 { pub(crate) val: S::mask32x8, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask32x8 {} impl SimdFrom<[i32; 8], S> for mask32x8 { @@ -4050,118 +3561,47 @@ impl From> for [i32; 8] { value.simd.as_array_mask32x8(value) } } -impl core::ops::Deref for mask32x8 { - type Target = [i32; 8]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask32x8(self) - } -} -impl core::ops::DerefMut for mask32x8 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask32x8(self) - } -} impl core::fmt::Debug for mask32x8 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask32x8", - &self.simd, - self.simd.as_array_ref_mask32x8(self), - ) + let lanes = self.simd.as_array_mask32x8(*self); + crate::support::simd_debug_impl(f, "mask32x8", &self.simd, &lanes) } } -impl SimdFrom for mask32x8 { +impl SimdFrom for mask32x8 { #[inline(always)] - fn simd_from(simd: S, value: i32) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask32x8(value) } } -impl core::ops::Index for mask32x8 { - type Output = i32; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask32x8(self)[i] - } -} -impl core::ops::IndexMut for mask32x8 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask32x8(self)[i] - } -} impl Select> for mask32x8 { #[inline(always)] fn select(self, if_true: mask32x8, if_false: mask32x8) -> mask32x8 { self.simd.select_mask32x8(self, if_true, if_false) } } -impl Bytes for mask32x8 { - type Bytes = u8x32; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask32x8(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask32x8(value) - } -} -impl SimdBase for mask32x8 { +impl crate::SimdMask for mask32x8 { type Element = i32; const N: usize = 8; - type Mask = mask32x8; - type Block = mask32x4; - type Array = [i32; 8]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i32] { - self.simd.as_array_ref_mask32x8(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i32] { - self.simd.as_array_mut_mask32x8(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask32x8(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i32]) -> Self { - simd.load_array_ref_mask32x8(slice.try_into().unwrap()) + let slice: &[i32; 8] = slice.try_into().unwrap(); + simd.load_array_mask32x8(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { - self.simd - .store_array_mask32x8(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i32) -> Self { - simd.splat_mask32x8(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block.simd.combine_mask32x4(block, block) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i32) -> Self { - simd.load_array_mask32x8(core::array::from_fn(f)) + let slice: &mut [i32; 8] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask32x8(*self); } #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask32x8::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask32x8::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask32x8 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask32x8(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -4181,20 +3621,6 @@ impl crate::SimdMask for mask32x8 { self.simd.all_false_mask32x8(self) } } -impl crate::SimdSplit for mask32x8 { - type Split = mask32x4; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask32x8(self) - } -} -impl crate::SimdCombine for mask32x8 { - type Combined = mask32x16; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask32x8(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 4 [`f64`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f64x4};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f64x4::splat(simd, 1.0);\n let b = f64x4::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f64x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]);\n\n // From an array:\n let d = f64x4::simd_from(simd, [1.0, 2.0, 3.0, 4.0]);\n\n // From an element-wise function:\n let e = f64x4::from_fn(simd, |i| i as f64);\n # use fearless_simd::f64x2;\n // From `Self::Block`:\n let f = f64x4::block_splat(f64x2::simd_from(simd, [1.0, 2.0]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(32))] @@ -4437,12 +3863,11 @@ impl crate::SimdCombine for f64x4 { self.simd.combine_f64x4(self, rhs.simd_into(self.simd)) } } -#[doc = "A SIMD mask of 4 64-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 4 logical lanes corresponding to 64-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(32))] pub struct mask64x4 { pub(crate) val: S::mask64x4, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask64x4 {} impl SimdFrom<[i64; 4], S> for mask64x4 { @@ -4457,118 +3882,47 @@ impl From> for [i64; 4] { value.simd.as_array_mask64x4(value) } } -impl core::ops::Deref for mask64x4 { - type Target = [i64; 4]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask64x4(self) - } -} -impl core::ops::DerefMut for mask64x4 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask64x4(self) - } -} impl core::fmt::Debug for mask64x4 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask64x4", - &self.simd, - self.simd.as_array_ref_mask64x4(self), - ) + let lanes = self.simd.as_array_mask64x4(*self); + crate::support::simd_debug_impl(f, "mask64x4", &self.simd, &lanes) } } -impl SimdFrom for mask64x4 { +impl SimdFrom for mask64x4 { #[inline(always)] - fn simd_from(simd: S, value: i64) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask64x4(value) } } -impl core::ops::Index for mask64x4 { - type Output = i64; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask64x4(self)[i] - } -} -impl core::ops::IndexMut for mask64x4 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask64x4(self)[i] - } -} impl Select> for mask64x4 { #[inline(always)] fn select(self, if_true: mask64x4, if_false: mask64x4) -> mask64x4 { self.simd.select_mask64x4(self, if_true, if_false) } } -impl Bytes for mask64x4 { - type Bytes = u8x32; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask64x4(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask64x4(value) - } -} -impl SimdBase for mask64x4 { +impl crate::SimdMask for mask64x4 { type Element = i64; const N: usize = 4; - type Mask = mask64x4; - type Block = mask64x2; - type Array = [i64; 4]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i64] { - self.simd.as_array_ref_mask64x4(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i64] { - self.simd.as_array_mut_mask64x4(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask64x4(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i64]) -> Self { - simd.load_array_ref_mask64x4(slice.try_into().unwrap()) + let slice: &[i64; 4] = slice.try_into().unwrap(); + simd.load_array_mask64x4(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i64]) { - self.simd - .store_array_mask64x4(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i64) -> Self { - simd.splat_mask64x4(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - block.simd.combine_mask64x2(block, block) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i64) -> Self { - simd.load_array_mask64x4(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask64x4::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask64x4::(self, rhs.simd_into(self.simd)) + let slice: &mut [i64; 4] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask64x4(*self); } -} -impl crate::SimdMask for mask64x4 { #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask64x4(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -4588,20 +3942,6 @@ impl crate::SimdMask for mask64x4 { self.simd.all_false_mask64x4(self) } } -impl crate::SimdSplit for mask64x4 { - type Split = mask64x2; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask64x4(self) - } -} -impl crate::SimdCombine for mask64x4 { - type Combined = mask64x8; - #[inline(always)] - fn combine(self, rhs: impl SimdInto) -> Self::Combined { - self.simd.combine_mask64x4(self, rhs.simd_into(self.simd)) - } -} #[doc = "A SIMD vector of 16 [`f32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f32x16};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f32x16::splat(simd, 1.0);\n let b = f32x16::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f32x16::from_slice(simd, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]);\n\n // From an array:\n let d = f32x16::simd_from(simd, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]);\n\n // From an element-wise function:\n let e = f32x16::from_fn(simd, |i| i as f32);\n # use fearless_simd::f32x4;\n // From `Self::Block`:\n let f = f32x16::block_splat(f32x4::simd_from(simd, [1.0, 2.0, 3.0, 4.0]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(64))] @@ -5230,139 +4570,66 @@ impl crate::SimdSplit for u8x64 { self.simd.split_u8x64(self) } } -#[doc = "A SIMD mask of 64 8-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 64 logical lanes corresponding to 8-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(64))] pub struct mask8x64 { pub(crate) val: S::mask8x64, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask8x64 {} impl SimdFrom<[i8; 64], S> for mask8x64 { #[inline(always)] - fn simd_from(simd: S, val: [i8; 64]) -> Self { - simd.load_array_mask8x64(val) - } -} -impl From> for [i8; 64] { - #[inline(always)] - fn from(value: mask8x64) -> Self { - value.simd.as_array_mask8x64(value) - } -} -impl core::ops::Deref for mask8x64 { - type Target = [i8; 64]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask8x64(self) + fn simd_from(simd: S, val: [i8; 64]) -> Self { + simd.load_array_mask8x64(val) } } -impl core::ops::DerefMut for mask8x64 { +impl From> for [i8; 64] { #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask8x64(self) + fn from(value: mask8x64) -> Self { + value.simd.as_array_mask8x64(value) } } impl core::fmt::Debug for mask8x64 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask8x64", - &self.simd, - self.simd.as_array_ref_mask8x64(self), - ) + let lanes = self.simd.as_array_mask8x64(*self); + crate::support::simd_debug_impl(f, "mask8x64", &self.simd, &lanes) } } -impl SimdFrom for mask8x64 { +impl SimdFrom for mask8x64 { #[inline(always)] - fn simd_from(simd: S, value: i8) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask8x64(value) } } -impl core::ops::Index for mask8x64 { - type Output = i8; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask8x64(self)[i] - } -} -impl core::ops::IndexMut for mask8x64 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask8x64(self)[i] - } -} impl Select> for mask8x64 { #[inline(always)] fn select(self, if_true: mask8x64, if_false: mask8x64) -> mask8x64 { self.simd.select_mask8x64(self, if_true, if_false) } } -impl Bytes for mask8x64 { - type Bytes = u8x64; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask8x64(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask8x64(value) - } -} -impl SimdBase for mask8x64 { +impl crate::SimdMask for mask8x64 { type Element = i8; const N: usize = 64; - type Mask = mask8x64; - type Block = mask8x16; - type Array = [i8; 64]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i8] { - self.simd.as_array_ref_mask8x64(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i8] { - self.simd.as_array_mut_mask8x64(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask8x64(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i8]) -> Self { - simd.load_array_ref_mask8x64(slice.try_into().unwrap()) + let slice: &[i8; 64] = slice.try_into().unwrap(); + simd.load_array_mask8x64(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i8]) { - self.simd - .store_array_mask8x64(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i8) -> Self { - simd.splat_mask8x64(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - let block2 = block.simd.combine_mask8x16(block, block); - block2.simd.combine_mask8x32(block2, block2) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i8) -> Self { - simd.load_array_mask8x64(core::array::from_fn(f)) + let slice: &mut [i8; 64] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask8x64(*self); } #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask8x64::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask8x64::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask8x64 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask8x64(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -5382,13 +4649,6 @@ impl crate::SimdMask for mask8x64 { self.simd.all_false_mask8x64(self) } } -impl crate::SimdSplit for mask8x64 { - type Split = mask8x32; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask8x64(self) - } -} #[doc = "A SIMD vector of 32 [`i16`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, i16x32};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = i16x32::splat(simd, 1);\n let b = i16x32::simd_from(simd, 1);\n\n // From a slice:\n let c = i16x32::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]);\n\n // From an array:\n let d = i16x32::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]);\n\n // From an element-wise function:\n let e = i16x32::from_fn(simd, |i| i as i16);\n # use fearless_simd::i16x8;\n // From `Self::Block`:\n let f = i16x32::block_splat(i16x8::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(64))] @@ -5773,12 +5033,11 @@ impl crate::SimdSplit for u16x32 { self.simd.split_u16x32(self) } } -#[doc = "A SIMD mask of 32 16-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 32 logical lanes corresponding to 16-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(64))] pub struct mask16x32 { pub(crate) val: S::mask16x32, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask16x32 {} impl SimdFrom<[i16; 32], S> for mask16x32 { @@ -5793,119 +5052,47 @@ impl From> for [i16; 32] { value.simd.as_array_mask16x32(value) } } -impl core::ops::Deref for mask16x32 { - type Target = [i16; 32]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask16x32(self) - } -} -impl core::ops::DerefMut for mask16x32 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask16x32(self) - } -} impl core::fmt::Debug for mask16x32 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask16x32", - &self.simd, - self.simd.as_array_ref_mask16x32(self), - ) + let lanes = self.simd.as_array_mask16x32(*self); + crate::support::simd_debug_impl(f, "mask16x32", &self.simd, &lanes) } } -impl SimdFrom for mask16x32 { +impl SimdFrom for mask16x32 { #[inline(always)] - fn simd_from(simd: S, value: i16) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask16x32(value) } } -impl core::ops::Index for mask16x32 { - type Output = i16; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask16x32(self)[i] - } -} -impl core::ops::IndexMut for mask16x32 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask16x32(self)[i] - } -} impl Select> for mask16x32 { #[inline(always)] fn select(self, if_true: mask16x32, if_false: mask16x32) -> mask16x32 { self.simd.select_mask16x32(self, if_true, if_false) } } -impl Bytes for mask16x32 { - type Bytes = u8x64; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask16x32(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask16x32(value) - } -} -impl SimdBase for mask16x32 { +impl crate::SimdMask for mask16x32 { type Element = i16; const N: usize = 32; - type Mask = mask16x32; - type Block = mask16x8; - type Array = [i16; 32]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i16] { - self.simd.as_array_ref_mask16x32(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i16] { - self.simd.as_array_mut_mask16x32(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask16x32(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i16]) -> Self { - simd.load_array_ref_mask16x32(slice.try_into().unwrap()) + let slice: &[i16; 32] = slice.try_into().unwrap(); + simd.load_array_mask16x32(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i16]) { - self.simd - .store_array_mask16x32(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i16) -> Self { - simd.splat_mask16x32(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - let block2 = block.simd.combine_mask16x8(block, block); - block2.simd.combine_mask16x16(block2, block2) + let slice: &mut [i16; 32] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask16x32(*self); } #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i16) -> Self { - simd.load_array_mask16x32(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask16x32::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask16x32::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask16x32 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask16x32(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -5925,13 +5112,6 @@ impl crate::SimdMask for mask16x32 { self.simd.all_false_mask16x32(self) } } -impl crate::SimdSplit for mask16x32 { - type Split = mask16x16; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask16x32(self) - } -} #[doc = "A SIMD vector of 16 [`i32`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, i32x16};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = i32x16::splat(simd, 1);\n let b = i32x16::simd_from(simd, 1);\n\n // From a slice:\n let c = i32x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);\n\n // From an array:\n let d = i32x16::simd_from(simd, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);\n\n // From an element-wise function:\n let e = i32x16::from_fn(simd, |i| i as i32);\n # use fearless_simd::i32x4;\n // From `Self::Block`:\n let f = i32x16::block_splat(i32x4::simd_from(simd, [1, 2, 3, 4]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(64))] @@ -6340,12 +5520,11 @@ impl crate::SimdSplit for u32x16 { self.simd.split_u32x16(self) } } -#[doc = "A SIMD mask of 16 32-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 16 logical lanes corresponding to 32-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(64))] pub struct mask32x16 { pub(crate) val: S::mask32x16, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask32x16 {} impl SimdFrom<[i32; 16], S> for mask32x16 { @@ -6360,119 +5539,47 @@ impl From> for [i32; 16] { value.simd.as_array_mask32x16(value) } } -impl core::ops::Deref for mask32x16 { - type Target = [i32; 16]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask32x16(self) - } -} -impl core::ops::DerefMut for mask32x16 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask32x16(self) - } -} impl core::fmt::Debug for mask32x16 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask32x16", - &self.simd, - self.simd.as_array_ref_mask32x16(self), - ) + let lanes = self.simd.as_array_mask32x16(*self); + crate::support::simd_debug_impl(f, "mask32x16", &self.simd, &lanes) } } -impl SimdFrom for mask32x16 { +impl SimdFrom for mask32x16 { #[inline(always)] - fn simd_from(simd: S, value: i32) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask32x16(value) } } -impl core::ops::Index for mask32x16 { - type Output = i32; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask32x16(self)[i] - } -} -impl core::ops::IndexMut for mask32x16 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask32x16(self)[i] - } -} impl Select> for mask32x16 { #[inline(always)] fn select(self, if_true: mask32x16, if_false: mask32x16) -> mask32x16 { self.simd.select_mask32x16(self, if_true, if_false) } } -impl Bytes for mask32x16 { - type Bytes = u8x64; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask32x16(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask32x16(value) - } -} -impl SimdBase for mask32x16 { +impl crate::SimdMask for mask32x16 { type Element = i32; const N: usize = 16; - type Mask = mask32x16; - type Block = mask32x4; - type Array = [i32; 16]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i32] { - self.simd.as_array_ref_mask32x16(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i32] { - self.simd.as_array_mut_mask32x16(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask32x16(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i32]) -> Self { - simd.load_array_ref_mask32x16(slice.try_into().unwrap()) + let slice: &[i32; 16] = slice.try_into().unwrap(); + simd.load_array_mask32x16(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i32]) { - self.simd - .store_array_mask32x16(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i32) -> Self { - simd.splat_mask32x16(val) + let slice: &mut [i32; 16] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask32x16(*self); } #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - let block2 = block.simd.combine_mask32x4(block, block); - block2.simd.combine_mask32x8(block2, block2) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i32) -> Self { - simd.load_array_mask32x16(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask32x16::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask32x16::(self, rhs.simd_into(self.simd)) - } -} -impl crate::SimdMask for mask32x16 { - #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask32x16(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -6492,13 +5599,6 @@ impl crate::SimdMask for mask32x16 { self.simd.all_false_mask32x16(self) } } -impl crate::SimdSplit for mask32x16 { - type Split = mask32x8; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask32x16(self) - } -} #[doc = "A SIMD vector of 8 [`f64`] elements.\n\nYou may construct this vector type using the [`Self::splat`], [`Self::from_slice`], [`Self::simd_from`], [`Self::from_fn`], and [`Self::block_splat`] methods.\n\n```rust\n# use fearless_simd::{prelude::*, f64x8};\nfn construct_simd(simd: S) {\n // From a single scalar value:\n let a = f64x8::splat(simd, 1.0);\n let b = f64x8::simd_from(simd, 1.0);\n\n // From a slice:\n let c = f64x8::from_slice(simd, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);\n\n // From an array:\n let d = f64x8::simd_from(simd, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);\n\n // From an element-wise function:\n let e = f64x8::from_fn(simd, |i| i as f64);\n # use fearless_simd::f64x2;\n // From `Self::Block`:\n let f = f64x8::block_splat(f64x2::simd_from(simd, [1.0, 2.0]));\n}\n```"] #[derive(Clone, Copy)] #[repr(C, align(64))] @@ -6735,12 +5835,11 @@ impl crate::SimdSplit for f64x8 { self.simd.split_f64x8(self) } } -#[doc = "A SIMD mask of 8 64-bit elements.\n\nWhen created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\"."] +#[doc = "A SIMD mask of 8 logical lanes corresponding to 64-bit vector elements.\n\nThe storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1)."] #[derive(Clone, Copy)] -#[repr(C, align(64))] pub struct mask64x8 { pub(crate) val: S::mask64x8, - pub simd: S, + pub(crate) simd: S, } impl Seal for mask64x8 {} impl SimdFrom<[i64; 8], S> for mask64x8 { @@ -6755,119 +5854,47 @@ impl From> for [i64; 8] { value.simd.as_array_mask64x8(value) } } -impl core::ops::Deref for mask64x8 { - type Target = [i64; 8]; - #[inline(always)] - fn deref(&self) -> &Self::Target { - self.simd.as_array_ref_mask64x8(self) - } -} -impl core::ops::DerefMut for mask64x8 { - #[inline(always)] - fn deref_mut(&mut self) -> &mut Self::Target { - self.simd.as_array_mut_mask64x8(self) - } -} impl core::fmt::Debug for mask64x8 { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::support::simd_debug_impl( - f, - "mask64x8", - &self.simd, - self.simd.as_array_ref_mask64x8(self), - ) + let lanes = self.simd.as_array_mask64x8(*self); + crate::support::simd_debug_impl(f, "mask64x8", &self.simd, &lanes) } } -impl SimdFrom for mask64x8 { +impl SimdFrom for mask64x8 { #[inline(always)] - fn simd_from(simd: S, value: i64) -> Self { + fn simd_from(simd: S, value: bool) -> Self { simd.splat_mask64x8(value) } } -impl core::ops::Index for mask64x8 { - type Output = i64; - #[inline(always)] - fn index(&self, i: usize) -> &Self::Output { - &self.simd.as_array_ref_mask64x8(self)[i] - } -} -impl core::ops::IndexMut for mask64x8 { - #[inline(always)] - fn index_mut(&mut self, i: usize) -> &mut Self::Output { - &mut self.simd.as_array_mut_mask64x8(self)[i] - } -} impl Select> for mask64x8 { #[inline(always)] fn select(self, if_true: mask64x8, if_false: mask64x8) -> mask64x8 { self.simd.select_mask64x8(self, if_true, if_false) } } -impl Bytes for mask64x8 { - type Bytes = u8x64; - #[inline(always)] - fn to_bytes(self) -> Self::Bytes { - self.simd.cvt_to_bytes_mask64x8(self) - } - #[inline(always)] - fn from_bytes(value: Self::Bytes) -> Self { - value.simd.cvt_from_bytes_mask64x8(value) - } -} -impl SimdBase for mask64x8 { +impl crate::SimdMask for mask64x8 { type Element = i64; const N: usize = 8; - type Mask = mask64x8; - type Block = mask64x2; - type Array = [i64; 8]; #[inline(always)] fn witness(&self) -> S { self.simd } #[inline(always)] - fn as_slice(&self) -> &[i64] { - self.simd.as_array_ref_mask64x8(self).as_slice() - } - #[inline(always)] - fn as_mut_slice(&mut self) -> &mut [i64] { - self.simd.as_array_mut_mask64x8(self).as_mut_slice() + fn splat(simd: S, val: bool) -> Self { + simd.splat_mask64x8(val) } #[inline(always)] fn from_slice(simd: S, slice: &[i64]) -> Self { - simd.load_array_ref_mask64x8(slice.try_into().unwrap()) + let slice: &[i64; 8] = slice.try_into().unwrap(); + simd.load_array_mask64x8(*slice) } #[inline(always)] fn store_slice(&self, slice: &mut [i64]) { - self.simd - .store_array_mask64x8(*self, slice.try_into().unwrap()); - } - #[inline(always)] - fn splat(simd: S, val: i64) -> Self { - simd.splat_mask64x8(val) - } - #[inline(always)] - fn block_splat(block: Self::Block) -> Self { - let block2 = block.simd.combine_mask64x2(block, block); - block2.simd.combine_mask64x4(block2, block2) - } - #[inline(always)] - fn from_fn(simd: S, f: impl FnMut(usize) -> i64) -> Self { - simd.load_array_mask64x8(core::array::from_fn(f)) - } - #[inline(always)] - fn slide(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_mask64x8::(self, rhs.simd_into(self.simd)) - } - #[inline(always)] - fn slide_within_blocks(self, rhs: impl SimdInto) -> Self { - self.simd - .slide_within_blocks_mask64x8::(self, rhs.simd_into(self.simd)) + let slice: &mut [i64; 8] = slice.try_into().unwrap(); + *slice = self.simd.as_array_mask64x8(*self); } -} -impl crate::SimdMask for mask64x8 { #[inline(always)] - fn simd_eq(self, rhs: impl SimdInto) -> Self::Mask { + fn simd_eq(self, rhs: impl SimdInto) -> Self { self.simd.simd_eq_mask64x8(self, rhs.simd_into(self.simd)) } #[inline(always)] @@ -6887,10 +5914,3 @@ impl crate::SimdMask for mask64x8 { self.simd.all_false_mask64x8(self) } } -impl crate::SimdSplit for mask64x8 { - type Split = mask64x4; - #[inline(always)] - fn split(self) -> (Self::Split, Self::Split) { - self.simd.split_mask64x8(self) - } -} diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index 98dcf2fc..6388d315 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -917,8 +917,11 @@ impl Simd for Sse4_2 { __m128i::from(a).simd_into(self) } #[inline(always)] - fn splat_mask8x16(self, val: i8) -> mask8x16 { - unsafe { _mm_set1_epi8(val).simd_into(self) } + fn splat_mask8x16(self, val: bool) -> mask8x16 { + unsafe { + let val: i8 = if val { !0 } else { 0 }; + _mm_set1_epi8(val).simd_into(self) + } } #[inline(always)] fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16 { @@ -928,82 +931,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { - mask8x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x16(self, a: mask8x16) -> [i8; 16usize] { unsafe { core::mem::transmute::<__m128i, [i8; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize] { - unsafe { core::mem::transmute::<&__m128i, &[i8; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i8; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { - unsafe { - mask8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x16(self, a: mask8x16) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask8x16(b).val.0, - self.cvt_to_bytes_mask8x16(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x16(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - self.slide_mask8x16::(a, b) - } - #[inline(always)] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -1017,7 +948,7 @@ impl Simd for Sse4_2 { } #[inline(always)] fn not_mask8x16(self, a: mask8x16) -> mask8x16 { - a ^ !0 + self.xor_mask8x16(a, self.splat_mask8x16(true)) } #[inline(always)] fn select_mask8x16( @@ -1486,8 +1417,11 @@ impl Simd for Sse4_2 { __m128i::from(a).simd_into(self) } #[inline(always)] - fn splat_mask16x8(self, val: i16) -> mask16x8 { - unsafe { _mm_set1_epi16(val).simd_into(self) } + fn splat_mask16x8(self, val: bool) -> mask16x8 { + unsafe { + let val: i16 = if val { !0 } else { 0 }; + _mm_set1_epi16(val).simd_into(self) + } } #[inline(always)] fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8 { @@ -1497,82 +1431,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { - mask16x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x8(self, a: mask16x8) -> [i16; 8usize] { unsafe { core::mem::transmute::<__m128i, [i16; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize] { - unsafe { core::mem::transmute::<&__m128i, &[i16; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i16; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { - unsafe { - mask16x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x8(self, a: mask16x8) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask16x8(b).val.0, - self.cvt_to_bytes_mask16x8(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x8(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - self.slide_mask16x8::(a, b) - } - #[inline(always)] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -1586,7 +1448,7 @@ impl Simd for Sse4_2 { } #[inline(always)] fn not_mask16x8(self, a: mask16x8) -> mask16x8 { - a ^ !0 + self.xor_mask16x8(a, self.splat_mask16x8(true)) } #[inline(always)] fn select_mask16x8( @@ -2065,8 +1927,11 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn splat_mask32x4(self, val: i32) -> mask32x4 { - unsafe { _mm_set1_epi32(val).simd_into(self) } + fn splat_mask32x4(self, val: bool) -> mask32x4 { + unsafe { + let val: i32 = if val { !0 } else { 0 }; + _mm_set1_epi32(val).simd_into(self) + } } #[inline(always)] fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4 { @@ -2076,82 +1941,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { - mask32x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize] { unsafe { core::mem::transmute::<__m128i, [i32; 4usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize] { - unsafe { core::mem::transmute::<&__m128i, &[i32; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i32; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { - unsafe { - mask32x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x4(self, a: mask32x4) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - unsafe { - if SHIFT >= 4usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask32x4(b).val.0, - self.cvt_to_bytes_mask32x4(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x4(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - self.slide_mask32x4::(a, b) - } - #[inline(always)] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -2165,7 +1958,7 @@ impl Simd for Sse4_2 { } #[inline(always)] fn not_mask32x4(self, a: mask32x4) -> mask32x4 { - a ^ !0 + self.xor_mask32x4(a, self.splat_mask32x4(true)) } #[inline(always)] fn select_mask32x4( @@ -2442,8 +2235,11 @@ impl Simd for Sse4_2 { unsafe { _mm_castpd_ps(a.into()).simd_into(self) } } #[inline(always)] - fn splat_mask64x2(self, val: i64) -> mask64x2 { - unsafe { _mm_set1_epi64x(val).simd_into(self) } + fn splat_mask64x2(self, val: bool) -> mask64x2 { + unsafe { + let val: i64 = if val { !0 } else { 0 }; + _mm_set1_epi64x(val).simd_into(self) + } } #[inline(always)] fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2 { @@ -2453,82 +2249,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { - mask64x2 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x2(self, a: mask64x2) -> [i64; 2usize] { unsafe { core::mem::transmute::<__m128i, [i64; 2usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize] { - unsafe { core::mem::transmute::<&__m128i, &[i64; 2usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize] { - unsafe { core::mem::transmute::<&mut __m128i, &mut [i64; 2usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 2usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { - unsafe { - mask64x2 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x2(self, a: mask64x2) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - unsafe { - if SHIFT >= 2usize { - return b; - } - let result = dyn_alignr_128( - self.cvt_to_bytes_mask64x2(b).val.0, - self.cvt_to_bytes_mask64x2(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x2(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - self.slide_mask64x2::(a, b) - } - #[inline(always)] fn and_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { unsafe { _mm_and_si128(a.into(), b.into()).simd_into(self) } } @@ -2542,7 +2266,7 @@ impl Simd for Sse4_2 { } #[inline(always)] fn not_mask64x2(self, a: mask64x2) -> mask64x2 { - a ^ !0 + self.xor_mask64x2(a, self.splat_mask64x2(true)) } #[inline(always)] fn select_mask64x2( @@ -3528,7 +3252,7 @@ impl Simd for Sse4_2 { ) } #[inline(always)] - fn splat_mask8x32(self, val: i8) -> mask8x32 { + fn splat_mask8x32(self, val: bool) -> mask8x32 { let half = self.splat_mask8x16(val); self.combine_mask8x16(half, half) } @@ -3540,87 +3264,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { - mask8x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x32(self, a: mask8x32) -> [i8; 32usize] { unsafe { core::mem::transmute::<[__m128i; 2usize], [i8; 32usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize] { - unsafe { core::mem::transmute::<&[__m128i; 2usize], &[i8; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i8; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { - unsafe { - mask8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x32(self, a: mask8x32) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - unsafe { - if SHIFT >= 32usize { - return b; - } - let result = cross_block_alignr_128x2( - self.cvt_to_bytes_mask8x32(b).val.0, - self.cvt_to_bytes_mask8x32(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x32(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - let (a0, a1) = self.split_mask8x32(a); - let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.slide_within_blocks_mask8x16::(a0, b0), - self.slide_within_blocks_mask8x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); @@ -4289,7 +3936,7 @@ impl Simd for Sse4_2 { ) } #[inline(always)] - fn splat_mask16x16(self, val: i16) -> mask16x16 { + fn splat_mask16x16(self, val: bool) -> mask16x16 { let half = self.splat_mask16x8(val); self.combine_mask16x8(half, half) } @@ -4301,87 +3948,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { - mask16x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x16(self, a: mask16x16) -> [i16; 16usize] { unsafe { core::mem::transmute::<[__m128i; 2usize], [i16; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize] { - unsafe { core::mem::transmute::<&[__m128i; 2usize], &[i16; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i16; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { - unsafe { - mask16x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x16(self, a: mask16x16) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = cross_block_alignr_128x2( - self.cvt_to_bytes_mask16x16(b).val.0, - self.cvt_to_bytes_mask16x16(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x16(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - let (a0, a1) = self.split_mask16x16(a); - let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.slide_within_blocks_mask16x8::(a0, b0), - self.slide_within_blocks_mask16x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); @@ -5041,7 +4611,7 @@ impl Simd for Sse4_2 { self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1)) } #[inline(always)] - fn splat_mask32x8(self, val: i32) -> mask32x8 { + fn splat_mask32x8(self, val: bool) -> mask32x8 { let half = self.splat_mask32x4(val); self.combine_mask32x4(half, half) } @@ -5053,87 +4623,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { - mask32x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize] { unsafe { core::mem::transmute::<[__m128i; 2usize], [i32; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize] { - unsafe { core::mem::transmute::<&[__m128i; 2usize], &[i32; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i32; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { - unsafe { - mask32x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x8(self, a: mask32x8) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = cross_block_alignr_128x2( - self.cvt_to_bytes_mask32x8(b).val.0, - self.cvt_to_bytes_mask32x8(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x8(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - let (a0, a1) = self.split_mask32x8(a); - let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.slide_within_blocks_mask32x4::(a0, b0), - self.slide_within_blocks_mask32x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_mask32x8(b); @@ -5545,97 +5038,20 @@ impl Simd for Sse4_2 { ) } #[inline(always)] - fn splat_mask64x4(self, val: i64) -> mask64x4 { + fn splat_mask64x4(self, val: bool) -> mask64x4 { let half = self.splat_mask64x2(val); self.combine_mask64x2(half, half) } #[inline(always)] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { core::mem::transmute_copy(&val) }, - simd: self, - } - } - #[inline(always)] - fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { - mask64x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] - fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { - unsafe { core::mem::transmute::<[__m128i; 2usize], [i64; 4usize]>(a.val.0) } - } - #[inline(always)] - fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize] { - unsafe { core::mem::transmute::<&[__m128i; 2usize], &[i64; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 2usize], &mut [i64; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { - unsafe { - mask64x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x4(self, a: mask64x4) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - unsafe { - if SHIFT >= 4usize { - return b; - } - let result = cross_block_alignr_128x2( - self.cvt_to_bytes_mask64x4(b).val.0, - self.cvt_to_bytes_mask64x4(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x4(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - let (a0, a1) = self.split_mask64x4(a); - let (b0, b1) = self.split_mask64x4(b); - self.combine_mask64x2( - self.slide_within_blocks_mask64x2::(a0, b0), - self.slide_within_blocks_mask64x2::(a1, b1), - ) + val: unsafe { core::mem::transmute_copy(&val) }, + simd: self, + } + } + #[inline(always)] + fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { + unsafe { core::mem::transmute::<[__m128i; 2usize], [i64; 4usize]>(a.val.0) } } #[inline(always)] fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { @@ -6743,7 +6159,7 @@ impl Simd for Sse4_2 { ) } #[inline(always)] - fn splat_mask8x64(self, val: i8) -> mask8x64 { + fn splat_mask8x64(self, val: bool) -> mask8x64 { let half = self.splat_mask8x32(val); self.combine_mask8x32(half, half) } @@ -6755,87 +6171,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { - mask8x64 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x64(self, a: mask8x64) -> [i8; 64usize] { unsafe { core::mem::transmute::<[__m128i; 4usize], [i8; 64usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize] { - unsafe { core::mem::transmute::<&[__m128i; 4usize], &[i8; 64usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i8; 64usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 64usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { - unsafe { - mask8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x64(self, a: mask8x64) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - unsafe { - if SHIFT >= 64usize { - return b; - } - let result = cross_block_alignr_128x4( - self.cvt_to_bytes_mask8x64(b).val.0, - self.cvt_to_bytes_mask8x64(a).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x64(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - let (a0, a1) = self.split_mask8x64(a); - let (b0, b1) = self.split_mask8x64(b); - self.combine_mask8x32( - self.slide_within_blocks_mask8x32::(a0, b0), - self.slide_within_blocks_mask8x32::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64 { let (a0, a1) = self.split_mask8x64(a); let (b0, b1) = self.split_mask8x64(b); @@ -7550,7 +6889,7 @@ impl Simd for Sse4_2 { ) } #[inline(always)] - fn splat_mask16x32(self, val: i16) -> mask16x32 { + fn splat_mask16x32(self, val: bool) -> mask16x32 { let half = self.splat_mask16x16(val); self.combine_mask16x16(half, half) } @@ -7562,87 +6901,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { - mask16x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x32(self, a: mask16x32) -> [i16; 32usize] { unsafe { core::mem::transmute::<[__m128i; 4usize], [i16; 32usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize] { - unsafe { core::mem::transmute::<&[__m128i; 4usize], &[i16; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i16; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { - unsafe { - mask16x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x32(self, a: mask16x32) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - unsafe { - if SHIFT >= 32usize { - return b; - } - let result = cross_block_alignr_128x4( - self.cvt_to_bytes_mask16x32(b).val.0, - self.cvt_to_bytes_mask16x32(a).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x32(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - let (a0, a1) = self.split_mask16x32(a); - let (b0, b1) = self.split_mask16x32(b); - self.combine_mask16x16( - self.slide_within_blocks_mask16x16::(a0, b0), - self.slide_within_blocks_mask16x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32 { let (a0, a1) = self.split_mask16x32(a); let (b0, b1) = self.split_mask16x32(b); @@ -8329,7 +7591,7 @@ impl Simd for Sse4_2 { self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1)) } #[inline(always)] - fn splat_mask32x16(self, val: i32) -> mask32x16 { + fn splat_mask32x16(self, val: bool) -> mask32x16 { let half = self.splat_mask32x8(val); self.combine_mask32x8(half, half) } @@ -8341,87 +7603,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { - mask32x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x16(self, a: mask32x16) -> [i32; 16usize] { unsafe { core::mem::transmute::<[__m128i; 4usize], [i32; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize] { - unsafe { core::mem::transmute::<&[__m128i; 4usize], &[i32; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i32; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { - unsafe { - mask32x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x16(self, a: mask32x16) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - unsafe { - if SHIFT >= 16usize { - return b; - } - let result = cross_block_alignr_128x4( - self.cvt_to_bytes_mask32x16(b).val.0, - self.cvt_to_bytes_mask32x16(a).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x16(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - let (a0, a1) = self.split_mask32x16(a); - let (b0, b1) = self.split_mask32x16(b); - self.combine_mask32x8( - self.slide_within_blocks_mask32x8::(a0, b0), - self.slide_within_blocks_mask32x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16 { let (a0, a1) = self.split_mask32x16(a); let (b0, b1) = self.split_mask32x16(b); @@ -8819,7 +8004,7 @@ impl Simd for Sse4_2 { ) } #[inline(always)] - fn splat_mask64x8(self, val: i64) -> mask64x8 { + fn splat_mask64x8(self, val: bool) -> mask64x8 { let half = self.splat_mask64x4(val); self.combine_mask64x4(half, half) } @@ -8831,87 +8016,10 @@ impl Simd for Sse4_2 { } } #[inline(always)] - fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { - mask64x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x8(self, a: mask64x8) -> [i64; 8usize] { unsafe { core::mem::transmute::<[__m128i; 4usize], [i64; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize] { - unsafe { core::mem::transmute::<&[__m128i; 4usize], &[i64; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize] { - unsafe { core::mem::transmute::<&mut [__m128i; 4usize], &mut [i64; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { - unsafe { - mask64x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x8(self, a: mask64x8) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - unsafe { - if SHIFT >= 8usize { - return b; - } - let result = cross_block_alignr_128x4( - self.cvt_to_bytes_mask64x8(b).val.0, - self.cvt_to_bytes_mask64x8(a).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x8(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - let (a0, a1) = self.split_mask64x8(a); - let (b0, b1) = self.split_mask64x8(b); - self.combine_mask64x4( - self.slide_within_blocks_mask64x4::(a0, b0), - self.slide_within_blocks_mask64x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8 { let (a0, a1) = self.split_mask64x8(a); let (b0, b1) = self.split_mask64x8(b); diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index ce07f344..4eb7671b 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -836,7 +836,8 @@ impl Simd for WasmSimd128 { ::from(a).simd_into(self) } #[inline(always)] - fn splat_mask8x16(self, val: i8) -> mask8x16 { + fn splat_mask8x16(self, val: bool) -> mask8x16 { + let val: i8 = if val { !0 } else { 0 }; i8x16_splat(val).simd_into(self) } #[inline(always)] @@ -847,82 +848,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16 { - mask8x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x16(self, a: mask8x16) -> [i8; 16usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x16(self, a: &mask8x16) -> &[i8; 16usize] { - unsafe { core::mem::transmute::<&v128, &[i8; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x16(self, a: &mut mask8x16) -> &mut [i8; 16usize] { - unsafe { core::mem::transmute::<&mut v128, &mut [i8; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x16(self, a: mask8x16, dest: &mut [i8; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x16(self, a: u8x16) -> mask8x16 { - unsafe { - mask8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x16(self, a: mask8x16) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - if SHIFT >= 16usize { - return b; - } - unsafe { - let result = dyn_slide_128( - self.cvt_to_bytes_mask8x16(a).val.0, - self.cvt_to_bytes_mask8x16(b).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x16(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x16( - self, - a: mask8x16, - b: mask8x16, - ) -> mask8x16 { - self.slide_mask8x16::(a, b) - } - #[inline(always)] fn and_mask8x16(self, a: mask8x16, b: mask8x16) -> mask8x16 { v128_and(a.into(), b.into()).simd_into(self) } @@ -1396,7 +1325,8 @@ impl Simd for WasmSimd128 { ::from(a).simd_into(self) } #[inline(always)] - fn splat_mask16x8(self, val: i16) -> mask16x8 { + fn splat_mask16x8(self, val: bool) -> mask16x8 { + let val: i16 = if val { !0 } else { 0 }; i16x8_splat(val).simd_into(self) } #[inline(always)] @@ -1407,82 +1337,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8 { - mask16x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x8(self, a: mask16x8) -> [i16; 8usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x8(self, a: &mask16x8) -> &[i16; 8usize] { - unsafe { core::mem::transmute::<&v128, &[i16; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x8(self, a: &mut mask16x8) -> &mut [i16; 8usize] { - unsafe { core::mem::transmute::<&mut v128, &mut [i16; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x8(self, a: mask16x8, dest: &mut [i16; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x8(self, a: u8x16) -> mask16x8 { - unsafe { - mask16x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x8(self, a: mask16x8) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - if SHIFT >= 8usize { - return b; - } - unsafe { - let result = dyn_slide_128( - self.cvt_to_bytes_mask16x8(a).val.0, - self.cvt_to_bytes_mask16x8(b).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x8(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x8( - self, - a: mask16x8, - b: mask16x8, - ) -> mask16x8 { - self.slide_mask16x8::(a, b) - } - #[inline(always)] fn and_mask16x8(self, a: mask16x8, b: mask16x8) -> mask16x8 { v128_and(a.into(), b.into()).simd_into(self) } @@ -1960,7 +1818,8 @@ impl Simd for WasmSimd128 { f32x4_convert_u32x4(a.into()).simd_into(self) } #[inline(always)] - fn splat_mask32x4(self, val: i32) -> mask32x4 { + fn splat_mask32x4(self, val: bool) -> mask32x4 { + let val: i32 = if val { !0 } else { 0 }; i32x4_splat(val).simd_into(self) } #[inline(always)] @@ -1971,82 +1830,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4 { - mask32x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x4(self, a: mask32x4) -> [i32; 4usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x4(self, a: &mask32x4) -> &[i32; 4usize] { - unsafe { core::mem::transmute::<&v128, &[i32; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x4(self, a: &mut mask32x4) -> &mut [i32; 4usize] { - unsafe { core::mem::transmute::<&mut v128, &mut [i32; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x4(self, a: mask32x4, dest: &mut [i32; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x4(self, a: u8x16) -> mask32x4 { - unsafe { - mask32x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x4(self, a: mask32x4) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - if SHIFT >= 4usize { - return b; - } - unsafe { - let result = dyn_slide_128( - self.cvt_to_bytes_mask32x4(a).val.0, - self.cvt_to_bytes_mask32x4(b).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x4(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x4( - self, - a: mask32x4, - b: mask32x4, - ) -> mask32x4 { - self.slide_mask32x4::(a, b) - } - #[inline(always)] fn and_mask32x4(self, a: mask32x4, b: mask32x4) -> mask32x4 { v128_and(a.into(), b.into()).simd_into(self) } @@ -2366,7 +2153,8 @@ impl Simd for WasmSimd128 { ::from(a).simd_into(self) } #[inline(always)] - fn splat_mask64x2(self, val: i64) -> mask64x2 { + fn splat_mask64x2(self, val: bool) -> mask64x2 { + let val: i64 = if val { !0 } else { 0 }; i64x2_splat(val).simd_into(self) } #[inline(always)] @@ -2377,82 +2165,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2 { - mask64x2 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x2(self, a: mask64x2) -> [i64; 2usize] { unsafe { core::mem::transmute::(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x2(self, a: &mask64x2) -> &[i64; 2usize] { - unsafe { core::mem::transmute::<&v128, &[i64; 2usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x2(self, a: &mut mask64x2) -> &mut [i64; 2usize] { - unsafe { core::mem::transmute::<&mut v128, &mut [i64; 2usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x2(self, a: mask64x2, dest: &mut [i64; 2usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 2usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x2(self, a: u8x16) -> mask64x2 { - unsafe { - mask64x2 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x2(self, a: mask64x2) -> u8x16 { - unsafe { - u8x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - if SHIFT >= 2usize { - return b; - } - unsafe { - let result = dyn_slide_128( - self.cvt_to_bytes_mask64x2(a).val.0, - self.cvt_to_bytes_mask64x2(b).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x2(u8x16 { - val: crate::support::Aligned128(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x2( - self, - a: mask64x2, - b: mask64x2, - ) -> mask64x2 { - self.slide_mask64x2::(a, b) - } - #[inline(always)] fn and_mask64x2(self, a: mask64x2, b: mask64x2) -> mask64x2 { v128_and(a.into(), b.into()).simd_into(self) } @@ -3459,7 +3175,7 @@ impl Simd for WasmSimd128 { ) } #[inline(always)] - fn splat_mask8x32(self, val: i8) -> mask8x32 { + fn splat_mask8x32(self, val: bool) -> mask8x32 { let half = self.splat_mask8x16(val); self.combine_mask8x16(half, half) } @@ -3471,87 +3187,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32 { - mask8x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x32(self, a: mask8x32) -> [i8; 32usize] { unsafe { core::mem::transmute::<[v128; 2usize], [i8; 32usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x32(self, a: &mask8x32) -> &[i8; 32usize] { - unsafe { core::mem::transmute::<&[v128; 2usize], &[i8; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x32(self, a: &mut mask8x32) -> &mut [i8; 32usize] { - unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i8; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x32(self, a: mask8x32, dest: &mut [i8; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x32(self, a: u8x32) -> mask8x32 { - unsafe { - mask8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x32(self, a: mask8x32) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - if SHIFT >= 32usize { - return b; - } - unsafe { - let result = cross_block_slide_128x2( - self.cvt_to_bytes_mask8x32(a).val.0, - self.cvt_to_bytes_mask8x32(b).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x32(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x32( - self, - a: mask8x32, - b: mask8x32, - ) -> mask8x32 { - let (a0, a1) = self.split_mask8x32(a); - let (b0, b1) = self.split_mask8x32(b); - self.combine_mask8x16( - self.slide_within_blocks_mask8x16::(a0, b0), - self.slide_within_blocks_mask8x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x32(self, a: mask8x32, b: mask8x32) -> mask8x32 { let (a0, a1) = self.split_mask8x32(a); let (b0, b1) = self.split_mask8x32(b); @@ -4218,7 +3857,7 @@ impl Simd for WasmSimd128 { ) } #[inline(always)] - fn splat_mask16x16(self, val: i16) -> mask16x16 { + fn splat_mask16x16(self, val: bool) -> mask16x16 { let half = self.splat_mask16x8(val); self.combine_mask16x8(half, half) } @@ -4230,87 +3869,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16 { - mask16x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x16(self, a: mask16x16) -> [i16; 16usize] { unsafe { core::mem::transmute::<[v128; 2usize], [i16; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x16(self, a: &mask16x16) -> &[i16; 16usize] { - unsafe { core::mem::transmute::<&[v128; 2usize], &[i16; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x16(self, a: &mut mask16x16) -> &mut [i16; 16usize] { - unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i16; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x16(self, a: mask16x16, dest: &mut [i16; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x16(self, a: u8x32) -> mask16x16 { - unsafe { - mask16x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x16(self, a: mask16x16) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - if SHIFT >= 16usize { - return b; - } - unsafe { - let result = cross_block_slide_128x2( - self.cvt_to_bytes_mask16x16(a).val.0, - self.cvt_to_bytes_mask16x16(b).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x16(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x16( - self, - a: mask16x16, - b: mask16x16, - ) -> mask16x16 { - let (a0, a1) = self.split_mask16x16(a); - let (b0, b1) = self.split_mask16x16(b); - self.combine_mask16x8( - self.slide_within_blocks_mask16x8::(a0, b0), - self.slide_within_blocks_mask16x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x16(self, a: mask16x16, b: mask16x16) -> mask16x16 { let (a0, a1) = self.split_mask16x16(a); let (b0, b1) = self.split_mask16x16(b); @@ -4970,7 +4532,7 @@ impl Simd for WasmSimd128 { self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1)) } #[inline(always)] - fn splat_mask32x8(self, val: i32) -> mask32x8 { + fn splat_mask32x8(self, val: bool) -> mask32x8 { let half = self.splat_mask32x4(val); self.combine_mask32x4(half, half) } @@ -4982,87 +4544,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8 { - mask32x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x8(self, a: mask32x8) -> [i32; 8usize] { unsafe { core::mem::transmute::<[v128; 2usize], [i32; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x8(self, a: &mask32x8) -> &[i32; 8usize] { - unsafe { core::mem::transmute::<&[v128; 2usize], &[i32; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x8(self, a: &mut mask32x8) -> &mut [i32; 8usize] { - unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i32; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x8(self, a: mask32x8, dest: &mut [i32; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x8(self, a: u8x32) -> mask32x8 { - unsafe { - mask32x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x8(self, a: mask32x8) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - if SHIFT >= 8usize { - return b; - } - unsafe { - let result = cross_block_slide_128x2( - self.cvt_to_bytes_mask32x8(a).val.0, - self.cvt_to_bytes_mask32x8(b).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x8(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x8( - self, - a: mask32x8, - b: mask32x8, - ) -> mask32x8 { - let (a0, a1) = self.split_mask32x8(a); - let (b0, b1) = self.split_mask32x8(b); - self.combine_mask32x4( - self.slide_within_blocks_mask32x4::(a0, b0), - self.slide_within_blocks_mask32x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x8(self, a: mask32x8, b: mask32x8) -> mask32x8 { let (a0, a1) = self.split_mask32x8(a); let (b0, b1) = self.split_mask32x8(b); @@ -5474,97 +4959,20 @@ impl Simd for WasmSimd128 { ) } #[inline(always)] - fn splat_mask64x4(self, val: i64) -> mask64x4 { + fn splat_mask64x4(self, val: bool) -> mask64x4 { let half = self.splat_mask64x2(val); self.combine_mask64x2(half, half) } #[inline(always)] fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4 { mask64x4 { - val: unsafe { core::mem::transmute_copy(&val) }, - simd: self, - } - } - #[inline(always)] - fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4 { - mask64x4 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] - fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { - unsafe { core::mem::transmute::<[v128; 2usize], [i64; 4usize]>(a.val.0) } - } - #[inline(always)] - fn as_array_ref_mask64x4(self, a: &mask64x4) -> &[i64; 4usize] { - unsafe { core::mem::transmute::<&[v128; 2usize], &[i64; 4usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x4(self, a: &mut mask64x4) -> &mut [i64; 4usize] { - unsafe { core::mem::transmute::<&mut [v128; 2usize], &mut [i64; 4usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x4(self, a: mask64x4, dest: &mut [i64; 4usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 4usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x4(self, a: u8x32) -> mask64x4 { - unsafe { - mask64x4 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x4(self, a: mask64x4) -> u8x32 { - unsafe { - u8x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - if SHIFT >= 4usize { - return b; - } - unsafe { - let result = cross_block_slide_128x2( - self.cvt_to_bytes_mask64x4(a).val.0, - self.cvt_to_bytes_mask64x4(b).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x4(u8x32 { - val: crate::support::Aligned256(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x4( - self, - a: mask64x4, - b: mask64x4, - ) -> mask64x4 { - let (a0, a1) = self.split_mask64x4(a); - let (b0, b1) = self.split_mask64x4(b); - self.combine_mask64x2( - self.slide_within_blocks_mask64x2::(a0, b0), - self.slide_within_blocks_mask64x2::(a1, b1), - ) + val: unsafe { core::mem::transmute_copy(&val) }, + simd: self, + } + } + #[inline(always)] + fn as_array_mask64x4(self, a: mask64x4) -> [i64; 4usize] { + unsafe { core::mem::transmute::<[v128; 2usize], [i64; 4usize]>(a.val.0) } } #[inline(always)] fn and_mask64x4(self, a: mask64x4, b: mask64x4) -> mask64x4 { @@ -6680,7 +6088,7 @@ impl Simd for WasmSimd128 { ) } #[inline(always)] - fn splat_mask8x64(self, val: i8) -> mask8x64 { + fn splat_mask8x64(self, val: bool) -> mask8x64 { let half = self.splat_mask8x32(val); self.combine_mask8x32(half, half) } @@ -6692,87 +6100,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64 { - mask8x64 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask8x64(self, a: mask8x64) -> [i8; 64usize] { unsafe { core::mem::transmute::<[v128; 4usize], [i8; 64usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask8x64(self, a: &mask8x64) -> &[i8; 64usize] { - unsafe { core::mem::transmute::<&[v128; 4usize], &[i8; 64usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask8x64(self, a: &mut mask8x64) -> &mut [i8; 64usize] { - unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i8; 64usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask8x64(self, a: mask8x64, dest: &mut [i8; 64usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i8, - dest.as_mut_ptr(), - 64usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask8x64(self, a: u8x64) -> mask8x64 { - unsafe { - mask8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask8x64(self, a: mask8x64) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - if SHIFT >= 64usize { - return b; - } - unsafe { - let result = cross_block_slide_128x4( - self.cvt_to_bytes_mask8x64(a).val.0, - self.cvt_to_bytes_mask8x64(b).val.0, - SHIFT, - ); - self.cvt_from_bytes_mask8x64(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask8x64( - self, - a: mask8x64, - b: mask8x64, - ) -> mask8x64 { - let (a0, a1) = self.split_mask8x64(a); - let (b0, b1) = self.split_mask8x64(b); - self.combine_mask8x32( - self.slide_within_blocks_mask8x32::(a0, b0), - self.slide_within_blocks_mask8x32::(a1, b1), - ) - } - #[inline(always)] fn and_mask8x64(self, a: mask8x64, b: mask8x64) -> mask8x64 { let (a0, a1) = self.split_mask8x64(a); let (b0, b1) = self.split_mask8x64(b); @@ -7474,7 +6805,7 @@ impl Simd for WasmSimd128 { ) } #[inline(always)] - fn splat_mask16x32(self, val: i16) -> mask16x32 { + fn splat_mask16x32(self, val: bool) -> mask16x32 { let half = self.splat_mask16x16(val); self.combine_mask16x16(half, half) } @@ -7486,87 +6817,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32 { - mask16x32 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask16x32(self, a: mask16x32) -> [i16; 32usize] { unsafe { core::mem::transmute::<[v128; 4usize], [i16; 32usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask16x32(self, a: &mask16x32) -> &[i16; 32usize] { - unsafe { core::mem::transmute::<&[v128; 4usize], &[i16; 32usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask16x32(self, a: &mut mask16x32) -> &mut [i16; 32usize] { - unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i16; 32usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask16x32(self, a: mask16x32, dest: &mut [i16; 32usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i16, - dest.as_mut_ptr(), - 32usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask16x32(self, a: u8x64) -> mask16x32 { - unsafe { - mask16x32 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask16x32(self, a: mask16x32) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - if SHIFT >= 32usize { - return b; - } - unsafe { - let result = cross_block_slide_128x4( - self.cvt_to_bytes_mask16x32(a).val.0, - self.cvt_to_bytes_mask16x32(b).val.0, - SHIFT * 2usize, - ); - self.cvt_from_bytes_mask16x32(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask16x32( - self, - a: mask16x32, - b: mask16x32, - ) -> mask16x32 { - let (a0, a1) = self.split_mask16x32(a); - let (b0, b1) = self.split_mask16x32(b); - self.combine_mask16x16( - self.slide_within_blocks_mask16x16::(a0, b0), - self.slide_within_blocks_mask16x16::(a1, b1), - ) - } - #[inline(always)] fn and_mask16x32(self, a: mask16x32, b: mask16x32) -> mask16x32 { let (a0, a1) = self.split_mask16x32(a); let (b0, b1) = self.split_mask16x32(b); @@ -8250,7 +7504,7 @@ impl Simd for WasmSimd128 { self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1)) } #[inline(always)] - fn splat_mask32x16(self, val: i32) -> mask32x16 { + fn splat_mask32x16(self, val: bool) -> mask32x16 { let half = self.splat_mask32x8(val); self.combine_mask32x8(half, half) } @@ -8262,87 +7516,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16 { - mask32x16 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask32x16(self, a: mask32x16) -> [i32; 16usize] { unsafe { core::mem::transmute::<[v128; 4usize], [i32; 16usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask32x16(self, a: &mask32x16) -> &[i32; 16usize] { - unsafe { core::mem::transmute::<&[v128; 4usize], &[i32; 16usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask32x16(self, a: &mut mask32x16) -> &mut [i32; 16usize] { - unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i32; 16usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask32x16(self, a: mask32x16, dest: &mut [i32; 16usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i32, - dest.as_mut_ptr(), - 16usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask32x16(self, a: u8x64) -> mask32x16 { - unsafe { - mask32x16 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask32x16(self, a: mask32x16) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - if SHIFT >= 16usize { - return b; - } - unsafe { - let result = cross_block_slide_128x4( - self.cvt_to_bytes_mask32x16(a).val.0, - self.cvt_to_bytes_mask32x16(b).val.0, - SHIFT * 4usize, - ); - self.cvt_from_bytes_mask32x16(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask32x16( - self, - a: mask32x16, - b: mask32x16, - ) -> mask32x16 { - let (a0, a1) = self.split_mask32x16(a); - let (b0, b1) = self.split_mask32x16(b); - self.combine_mask32x8( - self.slide_within_blocks_mask32x8::(a0, b0), - self.slide_within_blocks_mask32x8::(a1, b1), - ) - } - #[inline(always)] fn and_mask32x16(self, a: mask32x16, b: mask32x16) -> mask32x16 { let (a0, a1) = self.split_mask32x16(a); let (b0, b1) = self.split_mask32x16(b); @@ -8740,7 +7917,7 @@ impl Simd for WasmSimd128 { ) } #[inline(always)] - fn splat_mask64x8(self, val: i64) -> mask64x8 { + fn splat_mask64x8(self, val: bool) -> mask64x8 { let half = self.splat_mask64x4(val); self.combine_mask64x4(half, half) } @@ -8752,87 +7929,10 @@ impl Simd for WasmSimd128 { } } #[inline(always)] - fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8 { - mask64x8 { - val: unsafe { core::mem::transmute_copy(val) }, - simd: self, - } - } - #[inline(always)] fn as_array_mask64x8(self, a: mask64x8) -> [i64; 8usize] { unsafe { core::mem::transmute::<[v128; 4usize], [i64; 8usize]>(a.val.0) } } #[inline(always)] - fn as_array_ref_mask64x8(self, a: &mask64x8) -> &[i64; 8usize] { - unsafe { core::mem::transmute::<&[v128; 4usize], &[i64; 8usize]>(&a.val.0) } - } - #[inline(always)] - fn as_array_mut_mask64x8(self, a: &mut mask64x8) -> &mut [i64; 8usize] { - unsafe { core::mem::transmute::<&mut [v128; 4usize], &mut [i64; 8usize]>(&mut a.val.0) } - } - #[inline(always)] - fn store_array_mask64x8(self, a: mask64x8, dest: &mut [i64; 8usize]) -> () { - unsafe { - core::ptr::copy_nonoverlapping( - (&raw const a.val.0) as *const i64, - dest.as_mut_ptr(), - 8usize, - ); - } - } - #[inline(always)] - fn cvt_from_bytes_mask64x8(self, a: u8x64) -> mask64x8 { - unsafe { - mask64x8 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn cvt_to_bytes_mask64x8(self, a: mask64x8) -> u8x64 { - unsafe { - u8x64 { - val: core::mem::transmute(a.val), - simd: self, - } - } - } - #[inline(always)] - fn slide_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - if SHIFT >= 8usize { - return b; - } - unsafe { - let result = cross_block_slide_128x4( - self.cvt_to_bytes_mask64x8(a).val.0, - self.cvt_to_bytes_mask64x8(b).val.0, - SHIFT * 8usize, - ); - self.cvt_from_bytes_mask64x8(u8x64 { - val: crate::support::Aligned512(result), - simd: self, - }) - } - } - #[inline(always)] - fn slide_within_blocks_mask64x8( - self, - a: mask64x8, - b: mask64x8, - ) -> mask64x8 { - let (a0, a1) = self.split_mask64x8(a); - let (b0, b1) = self.split_mask64x8(b); - self.combine_mask64x4( - self.slide_within_blocks_mask64x4::(a0, b0), - self.slide_within_blocks_mask64x4::(a1, b1), - ) - } - #[inline(always)] fn and_mask64x8(self, a: mask64x8, b: mask64x8) -> mask64x8 { let (a0, a1) = self.split_mask64x8(a); let (b0, b1) = self.split_mask64x8(b); diff --git a/fearless_simd/src/traits.rs b/fearless_simd/src/traits.rs index 06a2f3a3..e51b0a5f 100644 --- a/fearless_simd/src/traits.rs +++ b/fearless_simd/src/traits.rs @@ -9,11 +9,12 @@ use crate::{Level, Simd, SimdBase, seal::Seal}; /// Element-wise selection between two SIMD vectors using `self`. pub trait Select: Seal { - /// For each element of this mask, select the first operand if the element is all ones, and select the second - /// operand if the element is all zeroes. + /// For each logical lane of this mask, select the first operand if the lane is true, and select the second + /// operand if the lane is false. /// - /// If a mask element is *not* all ones or all zeroes, the result is unspecified. It may vary depending on - /// architecture, feature level, the mask elements' width, the mask vector's width, or library version. + /// Masks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those + /// conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1). + /// If a mask is constructed from any other integer bit pattern, the result of this operation is unspecified. fn select(self, if_true: T, if_false: T) -> T; } diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index d3258df2..240e28ad 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -136,8 +136,17 @@ impl Level for Fallback { match sig { OpSig::Splat => { let num_elements = vec_ty.len; + let normalize_mask = if vec_ty.scalar == ScalarType::Mask { + let scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); + quote! { + let val: #scalar = if val { !0 } else { 0 }; + } + } else { + quote! {} + }; quote! { #method_sig { + #normalize_mask [val; #num_elements].simd_into(self) } } @@ -146,7 +155,7 @@ impl Level for Fallback { let items = make_list( (0..vec_ty.len) .map(|idx| { - let args = [quote! { a[#idx] }]; + let args = [lane(quote! { a }, vec_ty, idx)]; let expr = fallback::expr(method, vec_ty, &args); quote! { #expr } }) @@ -164,7 +173,8 @@ impl Level for Fallback { (0..vec_ty.len) .map(|idx| { let scalar_ty = target_ty.scalar.rust(target_ty.scalar_bits); - quote! { a[#idx] as #scalar_ty } + let a = lane(quote! { a }, vec_ty, idx); + quote! { #a as #scalar_ty } }) .collect::>(), ); @@ -179,6 +189,7 @@ impl Level for Fallback { let items = make_list( (0..vec_ty.len) .map(|idx| { + let b_lane = lane(quote! { b }, vec_ty, idx); let b = if fallback::translate_op( method, vec_ty.scalar == ScalarType::Float, @@ -186,12 +197,12 @@ impl Level for Fallback { .map(rhs_reference) .unwrap_or(true) { - quote! { &b[#idx] } + quote! { &#b_lane } } else { - quote! { b[#idx] } + b_lane }; - let args = [quote! { a[#idx] }, quote! { #b }]; + let args = [lane(quote! { a }, vec_ty, idx), quote! { #b }]; let expr = fallback::expr(method, vec_ty, &args); quote! { #expr } }) @@ -208,7 +219,7 @@ impl Level for Fallback { let items = make_list( (0..vec_ty.len) .map(|idx| { - let args = [quote! { a[#idx] }, quote! { shift }]; + let args = [lane(quote! { a }, vec_ty, idx), quote! { shift }]; let expr = fallback::expr(method, vec_ty, &args); quote! { #expr } }) @@ -254,7 +265,9 @@ impl Level for Fallback { let items = make_list( (0..vec_ty.len) .map(|idx: usize| { - let args = [quote! { &a[#idx] }, quote! { &b[#idx] }]; + let a = lane(quote! { a }, vec_ty, idx); + let b = lane(quote! { b }, vec_ty, idx); + let args = [quote! { &#a }, quote! { &#b }]; let expr = fallback::expr(method, vec_ty, &args); let mask_ty = mask_type.scalar.rust(vec_ty.scalar_bits); quote! { -(#expr as #mask_ty) } @@ -269,10 +282,14 @@ impl Level for Fallback { } } OpSig::Select => { + let mask_type = vec_ty.mask_ty(); let items = make_list( (0..vec_ty.len) .map(|idx| { - quote! { if a[#idx] != 0 { b[#idx] } else { c[#idx] } } + let a = lane(quote! { a }, &mask_type, idx); + let b = lane(quote! { b }, vec_ty, idx); + let c = lane(quote! { c }, vec_ty, idx); + quote! { if #a != 0 { #b } else { #c } } }) .collect::>(), ); @@ -326,7 +343,9 @@ impl Level for Fallback { let zip = make_list( indices .map(|idx| { - quote! {a[#idx], b[#idx] } + let a = lane(quote! { a }, vec_ty, idx); + let b = lane(quote! { b }, vec_ty, idx); + quote! { #a, #b } }) .collect::>(), ); @@ -347,12 +366,8 @@ impl Level for Fallback { let unzip = make_list( indices .clone() - .map(|idx| { - quote! {a[#idx]} - }) - .chain(indices.map(|idx| { - quote! {b[#idx]} - })) + .map(|idx| lane(quote! { a }, vec_ty, idx)) + .chain(indices.map(|idx| lane(quote! { b }, vec_ty, idx))) .collect::>(), ); @@ -392,7 +407,8 @@ impl Level for Fallback { let items = make_list( (0..vec_ty.len) .map(|idx| { - quote! { a[#idx] as #scalar } + let a = lane(quote! { a }, vec_ty, idx); + quote! { #a as #scalar } }) .collect::>(), ); @@ -421,7 +437,6 @@ impl Level for Fallback { quantifier, condition, } => { - let indices = (0..vec_ty.len).map(|idx| quote! { #idx }); let check = if condition { quote! { != } } else { @@ -430,10 +445,12 @@ impl Level for Fallback { let expr = match quantifier { crate::ops::Quantifier::Any => { - quote! { #(a[#indices] #check 0)||* } + let lanes = (0..vec_ty.len).map(|idx| lane(quote! { a }, vec_ty, idx)); + quote! { #(#lanes #check 0)||* } } crate::ops::Quantifier::All => { - quote! { #(a[#indices] #check 0)&&* } + let lanes = (0..vec_ty.len).map(|idx| lane(quote! { a }, vec_ty, idx)); + quote! { #(#lanes #check 0)&&* } } }; @@ -540,6 +557,14 @@ fn interleave_indices( make_list(indices.into_iter().map(func).collect::>()) } +fn lane(value: TokenStream, vec_ty: &VecType, idx: usize) -> TokenStream { + if vec_ty.scalar == ScalarType::Mask { + quote! { #value.val.0[#idx] } + } else { + quote! { #value[#idx] } + } +} + /// Whether the second argument of the function needs to be passed by reference. fn rhs_reference(method: &str) -> bool { !matches!( diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index 4b2f5f75..143f1878 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -82,9 +82,18 @@ impl Level for Neon { match sig { OpSig::Splat => { let expr = neon::expr(method, vec_ty, &[quote! { val }]); + let normalize_mask = if vec_ty.scalar == ScalarType::Mask { + let scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); + quote! { + let val: #scalar = if val { !0 } else { 0 }; + } + } else { + quote! {} + }; quote! { #method_sig { unsafe { + #normalize_mask #expr.simd_into(self) } } diff --git a/fearless_simd_gen/src/mk_ops.rs b/fearless_simd_gen/src/mk_ops.rs index f5ec724b..20ddc4c2 100644 --- a/fearless_simd_gen/src/mk_ops.rs +++ b/fearless_simd_gen/src/mk_ops.rs @@ -7,7 +7,7 @@ use quote::{format_ident, quote}; use crate::{ generic::generic_op_name, ops::{CoreOpTrait, OpKind, OpSig, TyFlavor, overloaded_ops_for}, - types::{SIMD_TYPES, type_imports}, + types::{SIMD_TYPES, ScalarType, type_imports}, }; pub(crate) fn mk_ops() -> TokenStream { @@ -85,6 +85,32 @@ pub(crate) fn mk_ops() -> TokenStream { } _ => { let scalar = ty.scalar.rust(ty.scalar_bits); + let scalar_overloads = (ty.scalar != ScalarType::Mask).then(|| { + quote! { + impl core::ops::#trait_id<#scalar> for #simd { + type Output = Self; + #[inline(always)] + fn #opfn(self, rhs: #scalar) -> Self::Output { + self.simd.#simd_fn(self, rhs.simd_into(self.simd)) + } + } + + impl core::ops::#trait_assign_id<#scalar> for #simd { + #[inline(always)] + fn #op_assign_fn(&mut self, rhs: #scalar) { + *self = self.simd.#simd_fn(*self, rhs.simd_into(self.simd)); + } + } + + impl core::ops::#trait_id<#simd> for #scalar { + type Output = #simd; + #[inline(always)] + fn #opfn(self, rhs: #simd) -> Self::Output { + rhs.simd.#simd_fn(self.simd_into(rhs.simd), rhs) + } + } + } + }); impls.push(quote! { impl core::ops::#trait_id for #simd { type Output = Self; @@ -103,28 +129,7 @@ pub(crate) fn mk_ops() -> TokenStream { } } - impl core::ops::#trait_id<#scalar> for #simd { - type Output = Self; - #[inline(always)] - fn #opfn(self, rhs: #scalar) -> Self::Output { - self.simd.#simd_fn(self, rhs.simd_into(self.simd)) - } - } - - impl core::ops::#trait_assign_id<#scalar> for #simd { - #[inline(always)] - fn #op_assign_fn(&mut self, rhs: #scalar) { - *self = self.simd.#simd_fn(*self, rhs.simd_into(self.simd)); - } - } - - impl core::ops::#trait_id<#simd> for #scalar { - type Output = #simd; - #[inline(always)] - fn #opfn(self, rhs: #simd) -> Self::Output { - rhs.simd.#simd_fn(self.simd_into(rhs.simd), rhs) - } - } + #scalar_overloads }); } } diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index 53ae7b1b..a973c01b 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -1,11 +1,14 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -use proc_macro2::TokenStream; -use quote::quote; +use proc_macro2::{Ident, Span, TokenStream}; +use quote::{format_ident, quote}; use crate::{ - ops::{OpKind, TyFlavor, base_trait_ops, ops_for_type, overloaded_ops_for, vec_trait_ops_for}, + ops::{ + CoreOpTrait, OpKind, OpSig, TyFlavor, base_trait_ops, ops_for_type, overloaded_ops_for, + vec_trait_ops_for, + }, types::{SIMD_TYPES, ScalarType, type_imports}, }; @@ -80,14 +83,13 @@ pub(crate) fn mk_simd_trait() -> TokenStream { type i32s: SimdInt, Mask = Self::mask32s, Bytes = ::Bytes> + SimdCvtTruncate + core::ops::Neg; /// A native-width SIMD mask with 8-bit lanes. - type mask8s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; + type mask8s: SimdMask + Select + Select + Select; /// A native-width SIMD mask with 16-bit lanes. - type mask16s: SimdMask, Bytes = ::Bytes> + Select + Select + Select; + type mask16s: SimdMask + Select + Select + Select; /// A native-width SIMD mask with 32-bit lanes. - type mask32s: SimdMask, Bytes = ::Bytes> - + Select + Select + Select + Select; + type mask32s: SimdMask + Select + Select + Select + Select; /// A native-width SIMD mask with 64-bit lanes. - type mask64s: SimdMask> + Select + Select; + type mask64s: SimdMask + Select + Select; /// This SIMD token's feature level. fn level(self) -> Level; @@ -156,14 +158,11 @@ fn mk_simd_base() -> TokenStream { /// working with a native-width vector (e.g. [`Simd::f32s`]) and /// want to process data in native-width chunks. const N: usize; - /// A SIMD vector mask with the same number of elements. + /// A SIMD vector mask with the same number of logical lanes. /// - /// The mask element is represented as an integer which is - /// all-0 for `false` and all-1 for `true`. When we get deep - /// into AVX-512, we need to think about predication masks. - /// - /// One possibility to consider is that the SIMD trait grows - /// `maskAxB` associated types. + /// Masks intentionally do not implement [`SimdBase`]. SSE, NEON, WASM, and the + /// fallback backend currently store masks as all-zero/all-one integer vectors, but + /// AVX-512/RVV/SVE-style targets use compact predicate registers instead. type Mask: SimdMask::Mask>; /// A 128-bit SIMD vector of the same scalar type. type Block: SimdBase; @@ -271,12 +270,54 @@ fn mk_simd_mask() -> TokenStream { OpKind::Overloaded(core_op) => Some(core_op), _ => None, }) - .flat_map(|core_op| core_op.trait_bounds()); + .flat_map(|core_op| { + let trait_name = Ident::new(core_op.trait_name(), Span::call_site()); + let trait_name_assign = format_ident!("{trait_name}Assign"); + match core_op { + CoreOpTrait::Not => vec![quote! { core::ops::#trait_name }], + _ => vec![ + quote! { core::ops::#trait_name }, + quote! { core::ops::#trait_name_assign }, + ], + } + }); quote! { /// Functionality implemented by SIMD masks. - pub trait SimdMask: SimdBase + Seal + /// + /// A mask has one logical boolean lane per SIMD lane. Its storage is intentionally opaque: + /// current backends may use all-zero/all-one integer vectors internally, while future + /// predicate-register backends may use a compact representation. + pub trait SimdMask: + Copy + Sync + Send + 'static + + Seal + + Select #(+ #op_traits)* { + /// The signed integer type used when converting this mask to and from lane values. + /// + /// False lanes are encoded as all zeroes (integer value 0), and true lanes are encoded as all ones + /// (integer value -1). + type Element: SimdElement; + + /// This mask type's lane count. + const N: usize; + + /// Get the [`Simd`] implementation associated with this type. + fn witness(&self) -> S; + + /// Create a SIMD mask with all lanes set to the given boolean value. + fn splat(simd: S, val: bool) -> Self; + + /// Create a SIMD mask from signed integer mask lanes. + /// + /// The slice must be exactly the size of the SIMD mask. + fn from_slice(simd: S, slice: &[Self::Element]) -> Self; + + /// Store this SIMD mask as signed integer mask lanes. + /// + /// The slice must be exactly the size of the SIMD mask. + fn store_slice(&self, slice: &mut [Self::Element]); + #( #methods )* } } @@ -286,7 +327,12 @@ fn methods_for_vec_trait(scalar: ScalarType) -> Vec { let mut methods = vec![]; for op in vec_trait_ops_for(scalar) { let doc = op.format_docstring(TyFlavor::VecImpl); - if let Some(method_sig) = op.vec_trait_method_sig() { + let method_sig = if scalar == ScalarType::Mask && matches!(op.sig, OpSig::Compare) { + Some(quote! { fn simd_eq(self, rhs: impl SimdInto) -> Self }) + } else { + op.vec_trait_method_sig() + }; + if let Some(method_sig) = method_sig { methods.push(quote! { #[doc = #doc] #method_sig; diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index 76974a48..22d48417 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -7,8 +7,8 @@ use quote::{format_ident, quote}; use crate::{ generic::generic_op_name, ops::{ - F32_TO_I32, F32_TO_I32_PRECISE, F32_TO_U32, F32_TO_U32_PRECISE, I32_TO_F32, Op, TyFlavor, - U32_TO_F32, vec_trait_ops_for, + F32_TO_I32, F32_TO_I32_PRECISE, F32_TO_U32, F32_TO_U32_PRECISE, I32_TO_F32, Op, OpSig, + TyFlavor, U32_TO_F32, vec_trait_ops_for, }, types::{SIMD_TYPES, ScalarType, VecType}, }; @@ -35,6 +35,59 @@ pub(crate) fn mk_simd_types() -> TokenStream { let bytes = VecType::new(ScalarType::Unsigned, 8, align).rust(); let mask = ty.mask_ty().rust(); + if ty.scalar == ScalarType::Mask { + let splat = Ident::new(&format!("splat_{}", ty.rust_name()), Span::call_site()); + let impl_block = simd_mask_impl(ty); + result.extend(quote! { + #[doc = #doc] + #[derive(Clone, Copy)] + pub struct #name { + pub(crate) val: S::#name, + pub(crate) simd: S, + } + + impl Seal for #name {} + + impl SimdFrom<[#rust_scalar; #len], S> for #name { + #[inline(always)] + fn simd_from(simd: S, val: [#rust_scalar; #len]) -> Self { + simd.#from_array_op(val) + } + } + + impl From<#name> for [#rust_scalar; #len] { + #[inline(always)] + fn from(value: #name) -> Self { + value.simd.#as_array_op(value) + } + } + + impl core::fmt::Debug for #name { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let lanes = self.simd.#as_array_op(*self); + crate::support::simd_debug_impl(f, #name_str, &self.simd, &lanes) + } + } + + impl SimdFrom for #name { + #[inline(always)] + fn simd_from(simd: S, value: bool) -> Self { + simd.#splat(value) + } + } + + impl Select<#name> for #name { + #[inline(always)] + fn select(self, if_true: #name, if_false: #name) -> #name { + self.simd.#select(self, if_true, if_false) + } + } + + #impl_block + }); + continue; + } + let scalar_impl = { let splat = Ident::new(&format!("splat_{}", ty.rust_name()), Span::call_site()); quote! { @@ -238,6 +291,67 @@ pub(crate) fn mk_simd_types() -> TokenStream { result } +fn simd_mask_impl(ty: &VecType) -> TokenStream { + let name = ty.rust(); + let scalar = ty.scalar.rust(ty.scalar_bits); + let len = Literal::usize_unsuffixed(ty.len); + let splat = generic_op_name("splat", ty); + let from_array_op = generic_op_name("load_array", ty); + let as_array_op = generic_op_name("as_array", ty); + let mut methods = vec![]; + for op in vec_trait_ops_for(ty.scalar) { + let Op { sig, method, .. } = op; + let trait_method = generic_op_name(method, ty); + let method_sig = if matches!(sig, OpSig::Compare) { + Some(quote! { fn simd_eq(self, rhs: impl SimdInto) -> Self }) + } else { + op.vec_trait_method_sig() + }; + if let Some(method_sig) = method_sig { + let call_args = sig + .forwarding_call_args() + .expect("this method can be forwarded to a specific Simd function"); + methods.push(quote! { + #[inline(always)] + #method_sig { + self.simd.#trait_method(#call_args) + } + }); + } + } + + quote! { + impl crate::SimdMask for #name { + type Element = #scalar; + const N: usize = #len; + + #[inline(always)] + fn witness(&self) -> S { + self.simd + } + + #[inline(always)] + fn splat(simd: S, val: bool) -> Self { + simd.#splat(val) + } + + #[inline(always)] + fn from_slice(simd: S, slice: &[#scalar]) -> Self { + let slice: &[#scalar; #len] = slice.try_into().unwrap(); + simd.#from_array_op(*slice) + } + + #[inline(always)] + fn store_slice(&self, slice: &mut [#scalar]) { + let slice: &mut [#scalar; #len] = slice.try_into().unwrap(); + *slice = self.simd.#as_array_op(*self); + } + + #( #methods )* + } + } +} + fn simd_vec_impl(ty: &VecType) -> TokenStream { let name = ty.rust(); let scalar = ty.scalar.rust(ty.scalar_bits); diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 13ab4fae..a66d3bf3 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -71,8 +71,17 @@ impl Level for WasmSimd128 { match sig { OpSig::Splat => { let expr = wasm::expr(method, vec_ty, &[quote! { val }]); + let normalize_mask = if vec_ty.scalar == ScalarType::Mask { + let scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); + quote! { + let val: #scalar = if val { !0 } else { 0 }; + } + } else { + quote! {} + }; quote! { #method_sig { + #normalize_mask #expr.simd_into(self) } } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index 3389393f..65456cad 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -49,6 +49,12 @@ impl Level for X86 { } fn arch_ty(&self, vec_ty: &VecType) -> TokenStream { + // Future AVX-512 backends should be able to keep mask types opaque by storing them as + // `__mmask*` predicate registers instead of `__m*i` vectors: for example, `mask8x64` + // maps naturally to `__mmask64`, `mask16x32` to `__mmask32`, and `mask32x16`/`mask64x8` + // to `__mmask16`/`__mmask8`. Comparisons would return `_mm512_cmp*_mask`, selects would + // use `_mm512_mask_blend_*`, and legacy integer-lane interop could materialize vectors + // with `_mm512_movm_epi*` only at the API boundary. let suffix = match (vec_ty.scalar, vec_ty.scalar_bits) { (ScalarType::Float, 32) => "", (ScalarType::Float, 64) => "d", @@ -224,9 +230,18 @@ impl X86 { ScalarType::Unsigned => quote!(.cast_signed()), _ => quote!(), }; + let normalize_mask = if vec_ty.scalar == ScalarType::Mask { + let scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); + quote! { + let val: #scalar = if val { !0 } else { 0 }; + } + } else { + quote! {} + }; quote! { #method_sig { unsafe { + #normalize_mask #intrinsic(val #cast).simd_into(self) } } @@ -329,6 +344,15 @@ impl X86 { } } } + "not" if vec_ty.scalar == ScalarType::Mask => { + let xor_op = generic_op_name("xor", vec_ty); + let splat_op = generic_op_name("splat", vec_ty); + quote! { + #method_sig { + self.#xor_op(a, self.#splat_op(true)) + } + } + } "not" => { quote! { #method_sig { diff --git a/fearless_simd_gen/src/ops.rs b/fearless_simd_gen/src/ops.rs index 85907d4c..521b0b5d 100644 --- a/fearless_simd_gen/src/ops.rs +++ b/fearless_simd_gen/src/ops.rs @@ -51,7 +51,8 @@ pub(crate) enum SlideGranularity { #[derive(Clone, Copy)] pub(crate) enum OpSig { - /// Takes a single argument of the underlying SIMD element type, and returns the corresponding vector type. + /// Takes a single scalar argument, and returns the corresponding vector type. + /// Mask splats take a boolean and convert it to the backend's mask representation. Splat, /// Takes a single argument of the vector type, and returns that same vector type. Unary, @@ -182,8 +183,8 @@ impl Op { let sig_inner = match &self.sig { OpSig::Splat => { let arg0 = &arg_names[0]; - let scalar = vec_ty.scalar.rust(vec_ty.scalar_bits); - quote! { (self, #arg0: #scalar) -> #ty } + let arg_ty = splat_arg_ty(vec_ty); + quote! { (self, #arg0: #arg_ty) -> #ty } } OpSig::LoadInterleaved { block_size, @@ -443,6 +444,14 @@ impl Op { } } +fn splat_arg_ty(vec_ty: &VecType) -> TokenStream { + if vec_ty.scalar == ScalarType::Mask { + quote! { bool } + } else { + vec_ty.scalar.rust(vec_ty.scalar_bits) + } +} + const BASE_OPS: &[Op] = &[ Op::new( "splat", @@ -526,6 +535,31 @@ const BASE_OPS: &[Op] = &[ ), ]; +const MASK_REPRESENTATION_OPS: &[Op] = &[ + Op::new( + "splat", + OpKind::BaseTraitMethod, + OpSig::Splat, + "Create a SIMD mask with all lanes set from the given boolean value.", + ), + Op::new( + "load_array", + OpKind::AssociatedOnly, + OpSig::FromArray { + kind: RefKind::Value, + }, + "Create a SIMD mask from signed integer mask lanes.", + ), + Op::new( + "as_array", + OpKind::AssociatedOnly, + OpSig::AsArray { + kind: RefKind::Value, + }, + "Convert a SIMD mask to signed integer mask lanes.", + ), +]; + const FLOAT_OPS: &[Op] = &[ Op::new( "abs", @@ -582,35 +616,35 @@ const FLOAT_OPS: &[Op] = &[ OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for equality.\n\n\ - Returns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not.", + Returns a mask where each logical lane is true if the corresponding elements are equal, and false if not.", ), Op::new( "simd_lt", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for less than.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is less than `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is less than `{arg1}`, and false if not.", ), Op::new( "simd_le", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for less than or equal.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is less than or equal to `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is less than or equal to `{arg1}`, and false if not.", ), Op::new( "simd_ge", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for greater than or equal.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is greater than or equal to `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is greater than or equal to `{arg1}`, and false if not.", ), Op::new( "simd_gt", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for greater than.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is greater than `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is greater than `{arg1}`, and false if not.", ), Op::new( "zip_low", @@ -758,7 +792,7 @@ const FLOAT_OPS: &[Op] = &[ OpKind::OwnTrait, OpSig::Select, "Select elements from {arg1} and {arg2} based on the mask operand {arg0}.\n\n\ - This operation's behavior is unspecified if each lane of {arg0} is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information.", + This operation's behavior is unspecified if {arg0} was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information.", ), ]; @@ -840,35 +874,35 @@ const INT_OPS: &[Op] = &[ OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for equality.\n\n\ - Returns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not.", + Returns a mask where each logical lane is true if the corresponding elements are equal, and false if not.", ), Op::new( "simd_lt", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for less than.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is less than `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is less than `{arg1}`, and false if not.", ), Op::new( "simd_le", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for less than or equal.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is less than or equal to `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is less than or equal to `{arg1}`, and false if not.", ), Op::new( "simd_ge", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for greater than or equal.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is greater than or equal to `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is greater than or equal to `{arg1}`, and false if not.", ), Op::new( "simd_gt", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for greater than.\n\n\ - Returns a mask where each element is all ones if `{arg0}` is greater than `{arg1}`, and all zeroes if not.", + Returns a mask where each logical lane is true if `{arg0}` is greater than `{arg1}`, and false if not.", ), Op::new( "zip_low", @@ -935,7 +969,7 @@ const INT_OPS: &[Op] = &[ OpKind::OwnTrait, OpSig::Select, "Select elements from {arg1} and {arg2} based on the mask operand {arg0}.\n\n\ - This operation's behavior is unspecified if each lane of {arg0} is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information.", + This operation's behavior is unspecified if {arg0} was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information.", ), Op::new( "min", @@ -955,9 +989,10 @@ const INT_OPS: &[Op] = &[ // `concat!` macro. macro_rules! mask_reduce_blurb { () => { - "Behavior on mask elements that are not all zeroes or all ones is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\n\ - The behavior is also not guaranteed to be logically consistent if mask elements are not all zeroes or all ones. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\n\ - The [`select`](crate::Select::select) operation also has unspecified behavior for mask elements that are not all zeroes or all ones. That behavior may not match the behavior of this operation." + "Masks may be converted to and from signed integer lane arrays for compatibility with older APIs. For those conversions, false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).\n\n\ + Behavior on masks constructed from any other integer bit pattern is unspecified. It may vary depending on architecture, feature level, the mask elements' width, the mask vector's width, or library version.\n\n\ + The behavior is also not guaranteed to be logically consistent for such non-canonical masks. `any_true` may not return the same result as `!all_false`, and `all_true` may not return the same result as `!any_false`.\n\n\ + The [`select`](crate::Select::select) operation also has unspecified behavior for non-canonical masks. That behavior may not match the behavior of this operation." } } @@ -991,14 +1026,14 @@ const MASK_OPS: &[Op] = &[ OpKind::OwnTrait, OpSig::Select, "Select elements from `{arg1}` and `{arg2}` based on the mask operand `{arg0}`.\n\n\ - This operation's behavior is unspecified if each lane of {arg0} is not the all-zeroes or all-ones bit pattern. See the [`Select`] trait's documentation for more information.", + This operation's behavior is unspecified if {arg0} was constructed from signed integer lanes that are neither all-zeroes (integer value 0) nor all-ones (integer value -1). See the [`Select`] trait's documentation for more information.", ), Op::new( "simd_eq", OpKind::VecTraitMethod, OpSig::Compare, "Compare two vectors element-wise for equality.\n\n\ - Returns a mask where each element is all ones if the corresponding elements are equal, and all zeroes if not.", + Returns a mask where each logical lane is true if the corresponding elements are equal, and false if not.", ), Op::new( "any_true", @@ -1008,7 +1043,7 @@ const MASK_OPS: &[Op] = &[ condition: true, }, concat!( - "Returns true if any elements in this mask are true (all ones).\n\n", + "Returns true if any logical lanes in this mask are true.\n\n", mask_reduce_blurb!() ), ), @@ -1020,7 +1055,7 @@ const MASK_OPS: &[Op] = &[ condition: true, }, concat!( - "Returns true if all elements in this mask are true (all ones).\n\n", + "Returns true if all logical lanes in this mask are true.\n\n", mask_reduce_blurb!() ), ), @@ -1032,7 +1067,7 @@ const MASK_OPS: &[Op] = &[ condition: false, }, concat!( - "Returns true if any elements in this mask are false (all zeroes).\n\n\ + "Returns true if any logical lanes in this mask are false.\n\n\ This is logically equivalent to `!all_true`, but may be faster.\n\n", mask_reduce_blurb!() ), @@ -1045,7 +1080,7 @@ const MASK_OPS: &[Op] = &[ condition: false, }, concat!( - "Returns true if all elements in this mask are false (all zeroes).\n\n\ + "Returns true if all logical lanes in this mask are false.\n\n\ This is logically equivalent to `!any_true`, but may be faster.\n\n", mask_reduce_blurb!() ), @@ -1171,7 +1206,15 @@ pub(crate) fn ops_for_type(ty: &VecType) -> Vec { ScalarType::Int | ScalarType::Unsigned => INT_OPS, ScalarType::Mask => MASK_OPS, }; - let mut ops: Vec = BASE_OPS.iter().chain(base.iter()).copied().collect(); + let representation_ops = match ty.scalar { + ScalarType::Mask => MASK_REPRESENTATION_OPS, + _ => BASE_OPS, + }; + let mut ops: Vec = representation_ops + .iter() + .chain(base.iter()) + .copied() + .collect(); if let Some(combined_ty) = ty.combine_operand() { ops.push(Op::new( diff --git a/fearless_simd_gen/src/types.rs b/fearless_simd_gen/src/types.rs index 6467206f..3b20e310 100644 --- a/fearless_simd_gen/src/types.rs +++ b/fearless_simd_gen/src/types.rs @@ -178,8 +178,8 @@ impl VecType { if self.scalar == ScalarType::Mask { let scalar_bits = self.scalar_bits; format!( - "A SIMD mask of {len} {scalar_bits}-bit elements.\n\n\ - When created from a comparison operation, and as it should be used in a [`Self::select`] operation, each element will be all ones if it's \"true\", and all zeroes if it's \"false\".", + "A SIMD mask of {len} logical lanes corresponding to {scalar_bits}-bit vector elements.\n\n\ + The storage representation of this type is intentionally opaque. For compatibility with existing APIs, it may be converted to and from signed integer lanes where false is encoded as all zeroes (integer value 0) and true is encoded as all ones (integer value -1).", ) } else { let scalar_name = self.scalar.rust_name(self.scalar_bits); diff --git a/fearless_simd_tests/tests/harness/lm_generated/mod_256.rs b/fearless_simd_tests/tests/harness/lm_generated/mod_256.rs index 2dc2c129..01363bac 100644 --- a/fearless_simd_tests/tests/harness/lm_generated/mod_256.rs +++ b/fearless_simd_tests/tests/harness/lm_generated/mod_256.rs @@ -83,35 +83,44 @@ fn copysign_f32x8(simd: S) { fn simd_eq_f32x8(simd: S) { let a = f32x8::from_slice(simd, &[4.0, 2.0, 1.0, 0.0, 5.0, 6.0, 7.0, 8.0]); let b = f32x8::from_slice(simd, &[4.0, 3.1, 1.0, 0.0, 5.0, 7.0, 7.0, 9.0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, -1, -1, 0, -1, 0]); + assert_eq!( + <[i32; 8]>::from(a.simd_eq(b)), + [-1, 0, -1, -1, -1, 0, -1, 0] + ); } #[simd_test] fn simd_lt_f32x8(simd: S) { let a = f32x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f32x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 10.0, 5.0, 8.0, 7.0]); - assert_eq!(*a.simd_lt(b), [0, 0, 0, -1, -1, 0, -1, 0]); + assert_eq!(<[i32; 8]>::from(a.simd_lt(b)), [0, 0, 0, -1, -1, 0, -1, 0]); } #[simd_test] fn simd_le_f32x8(simd: S) { let a = f32x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f32x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 7.0]); - assert_eq!(*a.simd_le(b), [0, 0, -1, -1, -1, -1, -1, 0]); + assert_eq!( + <[i32; 8]>::from(a.simd_le(b)), + [0, 0, -1, -1, -1, -1, -1, 0] + ); } #[simd_test] fn simd_ge_f32x8(simd: S) { let a = f32x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f32x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 7.0]); - assert_eq!(*a.simd_ge(b), [-1, -1, -1, 0, -1, -1, 0, -1]); + assert_eq!( + <[i32; 8]>::from(a.simd_ge(b)), + [-1, -1, -1, 0, -1, -1, 0, -1] + ); } #[simd_test] fn simd_gt_f32x8(simd: S) { let a = f32x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f32x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 7.0]); - assert_eq!(*a.simd_gt(b), [-1, -1, 0, 0, 0, 0, 0, -1]); + assert_eq!(<[i32; 8]>::from(a.simd_gt(b)), [-1, -1, 0, 0, 0, 0, 0, -1]); } #[simd_test] @@ -500,7 +509,7 @@ fn simd_eq_i8x32(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i8; 32]>::from(a.simd_eq(b)), [ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0 @@ -525,7 +534,7 @@ fn simd_lt_i8x32(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i8; 32]>::from(a.simd_lt(b)), [ -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -550,7 +559,7 @@ fn simd_gt_i8x32(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i8; 32]>::from(a.simd_gt(b)), [ -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -822,7 +831,7 @@ fn simd_eq_u8x32(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i8; 32]>::from(a.simd_eq(b)), [ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0 @@ -847,7 +856,7 @@ fn simd_lt_u8x32(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i8; 32]>::from(a.simd_lt(b)), [ -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -872,7 +881,7 @@ fn simd_gt_u8x32(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i8; 32]>::from(a.simd_gt(b)), [ -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -945,7 +954,7 @@ fn simd_eq_i16x16(simd: S) { let a = i16x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); let b = i16x16::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0, 1, 0, 3, 0, 5, 0, 7, 0]); assert_eq!( - *a.simd_eq(b), + <[i16; 16]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0] ); } @@ -961,7 +970,7 @@ fn simd_lt_i16x16(simd: S) { &[2, 2, 2, 5, 0, 0, 0, 0, 5, 25, 25, 45, 45, 65, 65, 85], ); assert_eq!( - *a.simd_lt(b), + <[i16; 16]>::from(a.simd_lt(b)), [-1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1] ); } @@ -977,7 +986,7 @@ fn simd_gt_i16x16(simd: S) { &[1, 2, 3, 4, -1, -2, -3, -4, 10, 20, 30, 40, 50, 60, 70, 80], ); assert_eq!( - *a.simd_gt(b), + <[i16; 16]>::from(a.simd_gt(b)), [-1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1] ); } @@ -1075,7 +1084,7 @@ fn simd_eq_u16x16(simd: S) { let a = u16x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); let b = u16x16::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0, 1, 0, 3, 0, 5, 0, 7, 0]); assert_eq!( - *a.simd_eq(b), + <[i16; 16]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0] ); } @@ -1091,7 +1100,7 @@ fn simd_lt_u16x16(simd: S) { &[2, 2, 2, 5, 4, 7, 6, 9, 5, 25, 25, 45, 45, 65, 65, 85], ); assert_eq!( - *a.simd_lt(b), + <[i16; 16]>::from(a.simd_lt(b)), [-1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1] ); } @@ -1107,7 +1116,7 @@ fn simd_gt_u16x16(simd: S) { &[1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 30, 40, 50, 60, 70, 80], ); assert_eq!( - *a.simd_gt(b), + <[i16; 16]>::from(a.simd_gt(b)), [-1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1] ); } @@ -1184,21 +1193,27 @@ fn neg_i32x8(simd: S) { fn simd_eq_i32x8(simd: S) { let a = i32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); let b = i32x8::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, 0, -1, 0, -1, 0]); + assert_eq!(<[i32; 8]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0]); } #[simd_test] fn simd_lt_i32x8(simd: S) { let a = i32x8::from_slice(simd, &[1, 2, 3, 4, -1, -2, -3, -4]); let b = i32x8::from_slice(simd, &[2, 2, 2, 5, 0, 0, 0, 0]); - assert_eq!(*a.simd_lt(b), [-1, 0, 0, -1, -1, -1, -1, -1]); + assert_eq!( + <[i32; 8]>::from(a.simd_lt(b)), + [-1, 0, 0, -1, -1, -1, -1, -1] + ); } #[simd_test] fn simd_gt_i32x8(simd: S) { let a = i32x8::from_slice(simd, &[2, 2, 2, 5, 0, 0, 0, 0]); let b = i32x8::from_slice(simd, &[1, 2, 3, 4, -1, -2, -3, -4]); - assert_eq!(*a.simd_gt(b), [-1, 0, 0, -1, -1, -1, -1, -1]); + assert_eq!( + <[i32; 8]>::from(a.simd_gt(b)), + [-1, 0, 0, -1, -1, -1, -1, -1] + ); } #[simd_test] @@ -1244,21 +1259,21 @@ fn sub_u32x8(simd: S) { fn simd_eq_u32x8(simd: S) { let a = u32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); let b = u32x8::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, 0, -1, 0, -1, 0]); + assert_eq!(<[i32; 8]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0]); } #[simd_test] fn simd_lt_u32x8(simd: S) { let a = u32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); let b = u32x8::from_slice(simd, &[2, 2, 2, 5, 4, 7, 6, 9]); - assert_eq!(*a.simd_lt(b), [-1, 0, 0, -1, 0, -1, 0, -1]); + assert_eq!(<[i32; 8]>::from(a.simd_lt(b)), [-1, 0, 0, -1, 0, -1, 0, -1]); } #[simd_test] fn simd_gt_u32x8(simd: S) { let a = u32x8::from_slice(simd, &[2, 2, 2, 5, 4, 7, 6, 9]); let b = u32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); - assert_eq!(*a.simd_gt(b), [-1, 0, 0, -1, 0, -1, 0, -1]); + assert_eq!(<[i32; 8]>::from(a.simd_gt(b)), [-1, 0, 0, -1, 0, -1, 0, -1]); } #[simd_test] diff --git a/fearless_simd_tests/tests/harness/lm_generated/mod_512.rs b/fearless_simd_tests/tests/harness/lm_generated/mod_512.rs index 6409c6e0..5f8abbcc 100644 --- a/fearless_simd_tests/tests/harness/lm_generated/mod_512.rs +++ b/fearless_simd_tests/tests/harness/lm_generated/mod_512.rs @@ -244,7 +244,7 @@ fn simd_eq_f32x16(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i32; 16]>::from(a.simd_eq(b)), [-1, 0, -1, -1, -1, 0, -1, 0, -1, 0, -1, -1, -1, 0, -1, 0] ); } @@ -264,7 +264,7 @@ fn simd_lt_f32x16(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i32; 16]>::from(a.simd_lt(b)), [0, 0, 0, -1, -1, 0, -1, 0, 0, 0, 0, -1, -1, 0, -1, 0] ); } @@ -284,7 +284,7 @@ fn simd_le_f32x16(simd: S) { ], ); assert_eq!( - *a.simd_le(b), + <[i32; 16]>::from(a.simd_le(b)), [0, 0, -1, -1, -1, -1, -1, 0, 0, 0, -1, -1, -1, -1, -1, 0] ); } @@ -304,7 +304,7 @@ fn simd_ge_f32x16(simd: S) { ], ); assert_eq!( - *a.simd_ge(b), + <[i32; 16]>::from(a.simd_ge(b)), [-1, -1, -1, 0, -1, -1, 0, -1, -1, -1, -1, 0, -1, -1, 0, -1] ); } @@ -324,7 +324,7 @@ fn simd_gt_f32x16(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i32; 16]>::from(a.simd_gt(b)), [-1, -1, 0, 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, 0, 0, -1] ); } @@ -729,7 +729,7 @@ fn simd_eq_i8x64(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i8; 64]>::from(a.simd_eq(b)), [ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, @@ -757,7 +757,7 @@ fn simd_lt_i8x64(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i8; 64]>::from(a.simd_lt(b)), [ -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, @@ -785,7 +785,7 @@ fn simd_gt_i8x64(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i8; 64]>::from(a.simd_gt(b)), [ -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, @@ -1099,7 +1099,7 @@ fn simd_eq_u8x64(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i8; 64]>::from(a.simd_eq(b)), [ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, @@ -1127,7 +1127,7 @@ fn simd_lt_u8x64(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i8; 64]>::from(a.simd_lt(b)), [ -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, @@ -1155,7 +1155,7 @@ fn simd_gt_u8x64(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i8; 64]>::from(a.simd_gt(b)), [ -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, @@ -1253,7 +1253,7 @@ fn simd_eq_i16x32(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i16; 32]>::from(a.simd_eq(b)), [ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0 @@ -1278,7 +1278,7 @@ fn simd_lt_i16x32(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i16; 32]>::from(a.simd_lt(b)), [ -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -1303,7 +1303,7 @@ fn simd_gt_i16x32(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i16; 32]>::from(a.simd_gt(b)), [ -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -1432,7 +1432,7 @@ fn simd_eq_u16x32(simd: S) { ], ); assert_eq!( - *a.simd_eq(b), + <[i16; 32]>::from(a.simd_eq(b)), [ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0 @@ -1457,7 +1457,7 @@ fn simd_lt_u16x32(simd: S) { ], ); assert_eq!( - *a.simd_lt(b), + <[i16; 32]>::from(a.simd_lt(b)), [ -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -1482,7 +1482,7 @@ fn simd_gt_u16x32(simd: S) { ], ); assert_eq!( - *a.simd_gt(b), + <[i16; 32]>::from(a.simd_gt(b)), [ -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1 @@ -1605,7 +1605,7 @@ fn simd_eq_i32x16(simd: S) { let a = i32x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); let b = i32x16::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0, 1, 0, 3, 0, 5, 0, 7, 0]); assert_eq!( - *a.simd_eq(b), + <[i32; 16]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0] ); } @@ -1618,7 +1618,7 @@ fn simd_lt_i32x16(simd: S) { ); let b = i32x16::from_slice(simd, &[2, 2, 2, 5, 0, 0, 0, 0, 2, 2, 2, 5, 0, 0, 0, 0]); assert_eq!( - *a.simd_lt(b), + <[i32; 16]>::from(a.simd_lt(b)), [-1, 0, 0, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, -1] ); } @@ -1631,7 +1631,7 @@ fn simd_gt_i32x16(simd: S) { &[1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4], ); assert_eq!( - *a.simd_gt(b), + <[i32; 16]>::from(a.simd_gt(b)), [-1, 0, 0, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, -1] ); } @@ -1742,7 +1742,7 @@ fn simd_eq_u32x16(simd: S) { let a = u32x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); let b = u32x16::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0, 1, 0, 3, 0, 5, 0, 7, 0]); assert_eq!( - *a.simd_eq(b), + <[i32; 16]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0] ); } @@ -1752,7 +1752,7 @@ fn simd_lt_u32x16(simd: S) { let a = u32x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); let b = u32x16::from_slice(simd, &[2, 2, 2, 5, 4, 7, 6, 9, 2, 2, 2, 5, 4, 7, 6, 9]); assert_eq!( - *a.simd_lt(b), + <[i32; 16]>::from(a.simd_lt(b)), [-1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1] ); } @@ -1762,7 +1762,7 @@ fn simd_gt_u32x16(simd: S) { let a = u32x16::from_slice(simd, &[2, 2, 2, 5, 4, 7, 6, 9, 2, 2, 2, 5, 4, 7, 6, 9]); let b = u32x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); assert_eq!( - *a.simd_gt(b), + <[i32; 16]>::from(a.simd_gt(b)), [-1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1] ); } @@ -1894,35 +1894,44 @@ fn copysign_f64x8(simd: S) { fn simd_eq_f64x8(simd: S) { let a = f64x8::from_slice(simd, &[4.0, 2.0, 1.0, 0.0, 5.0, 6.0, 7.0, 8.0]); let b = f64x8::from_slice(simd, &[4.0, 3.1, 1.0, 0.0, 5.0, 7.0, 7.0, 9.0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, -1, -1, 0, -1, 0]); + assert_eq!( + <[i64; 8]>::from(a.simd_eq(b)), + [-1, 0, -1, -1, -1, 0, -1, 0] + ); } #[simd_test] fn simd_lt_f64x8(simd: S) { let a = f64x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f64x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 10.0, 5.0, 8.0, 7.0]); - assert_eq!(*a.simd_lt(b), [0, 0, 0, -1, -1, 0, -1, 0]); + assert_eq!(<[i64; 8]>::from(a.simd_lt(b)), [0, 0, 0, -1, -1, 0, -1, 0]); } #[simd_test] fn simd_le_f64x8(simd: S) { let a = f64x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f64x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 7.0]); - assert_eq!(*a.simd_le(b), [0, 0, -1, -1, -1, -1, -1, 0]); + assert_eq!( + <[i64; 8]>::from(a.simd_le(b)), + [0, 0, -1, -1, -1, -1, -1, 0] + ); } #[simd_test] fn simd_ge_f64x8(simd: S) { let a = f64x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f64x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 7.0]); - assert_eq!(*a.simd_ge(b), [-1, -1, -1, 0, -1, -1, 0, -1]); + assert_eq!( + <[i64; 8]>::from(a.simd_ge(b)), + [-1, -1, -1, 0, -1, -1, 0, -1] + ); } #[simd_test] fn simd_gt_f64x8(simd: S) { let a = f64x8::from_slice(simd, &[4.0, 3.0, 2.0, 1.0, 5.0, 6.0, 7.0, 8.0]); let b = f64x8::from_slice(simd, &[1.0, 2.0, 2.0, 4.0, 5.0, 6.0, 8.0, 7.0]); - assert_eq!(*a.simd_gt(b), [-1, -1, 0, 0, 0, 0, 0, -1]); + assert_eq!(<[i64; 8]>::from(a.simd_gt(b)), [-1, -1, 0, 0, 0, 0, 0, -1]); } #[simd_test] diff --git a/fearless_simd_tests/tests/harness/mod.rs b/fearless_simd_tests/tests/harness/mod.rs index 777c7320..d5867bb9 100644 --- a/fearless_simd_tests/tests/harness/mod.rs +++ b/fearless_simd_tests/tests/harness/mod.rs @@ -22,6 +22,24 @@ fn splat_f32x4(simd: S) { assert_eq!(*a, [4.2, 4.2, 4.2, 4.2]); } +#[simd_test] +fn mask_trait_splat_mask32x4(simd: S) { + let t = mask32x4::splat(simd, true); + assert_eq!(<[i32; 4]>::from(t), [-1; 4]); + + let f = mask32x4::splat(simd, false); + assert_eq!(<[i32; 4]>::from(f), [0; 4]); +} + +#[simd_test] +fn splat_native_mask(simd: S) { + let all_true = S::mask32s::splat(simd, true); + assert!(all_true.all_true()); + + let all_false = S::mask32s::splat(simd, false); + assert!(all_false.all_false()); +} + #[simd_test] fn abs_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[-1.0, 2.0, -3.0, 4.0]); @@ -72,35 +90,35 @@ fn copysign_f32x4(simd: S) { fn simd_eq_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[4.0, 2.0, 1.0, 0.0]); let b = f32x4::from_slice(simd, &[4.0, 3.1, 1.0, 0.0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, -1]); + assert_eq!(<[i32; 4]>::from(a.simd_eq(b)), [-1, 0, -1, -1]); } #[simd_test] fn simd_lt_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[4.0, 3.0, 2.0, 1.0]); let b = f32x4::from_slice(simd, &[1.0, 2.0, 2.0, 4.0]); - assert_eq!(*a.simd_lt(b), [0, 0, 0, -1]); + assert_eq!(<[i32; 4]>::from(a.simd_lt(b)), [0, 0, 0, -1]); } #[simd_test] fn simd_le_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[4.0, 3.0, 2.0, 1.0]); let b = f32x4::from_slice(simd, &[1.0, 2.0, 2.0, 4.0]); - assert_eq!(*a.simd_le(b), [0, 0, -1, -1]); + assert_eq!(<[i32; 4]>::from(a.simd_le(b)), [0, 0, -1, -1]); } #[simd_test] fn simd_ge_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[4.0, 3.0, 2.0, 1.0]); let b = f32x4::from_slice(simd, &[1.0, 2.0, 2.0, 4.0]); - assert_eq!(*a.simd_ge(b), [-1, -1, -1, 0]); + assert_eq!(<[i32; 4]>::from(a.simd_ge(b)), [-1, -1, -1, 0]); } #[simd_test] fn simd_gt_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[4.0, 3.0, 2.0, 1.0]); let b = f32x4::from_slice(simd, &[1.0, 2.0, 2.0, 4.0]); - assert_eq!(*a.simd_gt(b), [-1, -1, 0, 0]); + assert_eq!(<[i32; 4]>::from(a.simd_gt(b)), [-1, -1, 0, 0]); } #[simd_test] @@ -422,7 +440,7 @@ fn simd_eq_i8x16(simd: S) { let a = i8x16::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]); let b = i8x16::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0, 1, 0, 3, 0, 5, 0, 7, 0]); assert_eq!( - *a.simd_eq(b), + <[i8; 16]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0] ); } @@ -438,7 +456,7 @@ fn simd_lt_i8x16(simd: S) { &[2, 2, 2, 5, 0, 0, 0, 0, 5, 25, 25, 45, 45, 65, 65, 85], ); assert_eq!( - *a.simd_lt(b), + <[i8; 16]>::from(a.simd_lt(b)), [-1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1] ); } @@ -454,7 +472,7 @@ fn simd_gt_i8x16(simd: S) { &[1, 2, 3, 4, -1, -2, -3, -4, 10, 20, 30, 40, 50, 60, 70, 80], ); assert_eq!( - *a.simd_gt(b), + <[i8; 16]>::from(a.simd_gt(b)), [-1, 0, 0, -1, -1, -1, -1, -1, 0, -1, 0, -1, 0, -1, 0, -1] ); } @@ -675,28 +693,37 @@ fn and_mask8x16(simd: S) { 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, ], ); - assert_eq!(*(a & b), [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]); + assert_eq!( + <[i8; 16]>::from(a & b), + [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + ); } #[simd_test] fn or_mask8x16(simd: S) { let a = mask8x16::from_slice(simd, &[0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8]); let b = mask8x16::from_slice(simd, &[1, 1, 1, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0]); - assert_eq!(*(a | b), [1, 1, 3, 3, 6, 7, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8]); + assert_eq!( + <[i8; 16]>::from(a | b), + [1, 1, 3, 3, 6, 7, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8] + ); } #[simd_test] fn xor_mask8x16(simd: S) { let a = mask8x16::from_slice(simd, &[0, 1, 2, 3, 4, 5, 6, 7, 1, 1, 1, 1, 0, 0, 0, 0]); let b = mask8x16::from_slice(simd, &[1, 1, 0, 0, 5, 4, 7, 6, 1, 0, 1, 0, 1, 0, 1, 0]); - assert_eq!(*(a ^ b), [1, 0, 2, 3, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0]); + assert_eq!( + <[i8; 16]>::from(a ^ b), + [1, 0, 2, 3, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0] + ); } #[simd_test] fn not_mask8x16(simd: S) { let a = mask8x16::from_slice(simd, &[0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8]); assert_eq!( - *(!a), + <[i8; 16]>::from(!a), [ -1, -2, -3, -4, -5, -6, -7, -8, -2, -3, -4, -5, -6, -7, -8, -9 ] @@ -2413,21 +2440,27 @@ fn neg_i16x8(simd: S) { fn simd_eq_i16x8(simd: S) { let a = i16x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); let b = i16x8::from_slice(simd, &[1, 0, 3, 0, 5, 0, 7, 0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, 0, -1, 0, -1, 0]); + assert_eq!(<[i16; 8]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0]); } #[simd_test] fn simd_lt_i16x8(simd: S) { let a = i16x8::from_slice(simd, &[1, 2, 3, 4, -1, -2, -3, -4]); let b = i16x8::from_slice(simd, &[2, 2, 2, 5, 0, 0, 0, 0]); - assert_eq!(*a.simd_lt(b), [-1, 0, 0, -1, -1, -1, -1, -1]); + assert_eq!( + <[i16; 8]>::from(a.simd_lt(b)), + [-1, 0, 0, -1, -1, -1, -1, -1] + ); } #[simd_test] fn simd_gt_i16x8(simd: S) { let a = i16x8::from_slice(simd, &[2, 2, 2, 5, 0, 0, 0, 0]); let b = i16x8::from_slice(simd, &[1, 2, 3, 4, -1, -2, -3, -4]); - assert_eq!(*a.simd_gt(b), [-1, 0, 0, -1, -1, -1, -1, -1]); + assert_eq!( + <[i16; 8]>::from(a.simd_gt(b)), + [-1, 0, 0, -1, -1, -1, -1, -1] + ); } #[simd_test] @@ -2472,21 +2505,21 @@ fn sub_u16x8(simd: S) { fn simd_eq_u16x8(simd: S) { let a = u16x8::from_slice(simd, &[1, 2, 32768, 40000, 65535, 6, 7, 8]); let b = u16x8::from_slice(simd, &[1, 0, 32768, 0, 65535, 0, 7, 0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, 0, -1, 0, -1, 0]); + assert_eq!(<[i16; 8]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0]); } #[simd_test] fn simd_lt_u16x8(simd: S) { let a = u16x8::from_slice(simd, &[1, 2, 3, 4, 100, 200, 300, 400]); let b = u16x8::from_slice(simd, &[2, 2, 2, 5, 40000, 150, 50000, 350]); - assert_eq!(*a.simd_lt(b), [-1, 0, 0, -1, -1, 0, -1, 0]); + assert_eq!(<[i16; 8]>::from(a.simd_lt(b)), [-1, 0, 0, -1, -1, 0, -1, 0]); } #[simd_test] fn simd_gt_u16x8(simd: S) { let a = u16x8::from_slice(simd, &[2, 2, 2, 5, 40000, 150, 50000, 350]); let b = u16x8::from_slice(simd, &[1, 2, 3, 4, 100, 200, 300, 400]); - assert_eq!(*a.simd_gt(b), [-1, 0, 0, -1, -1, 0, -1, 0]); + assert_eq!(<[i16; 8]>::from(a.simd_gt(b)), [-1, 0, 0, -1, -1, 0, -1, 0]); } #[simd_test] @@ -2531,21 +2564,21 @@ fn sub_i32x4(simd: S) { fn simd_eq_i32x4(simd: S) { let a = i32x4::from_slice(simd, &[1, 2, 3, 4]); let b = i32x4::from_slice(simd, &[1, 0, 3, 0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, 0]); + assert_eq!(<[i32; 4]>::from(a.simd_eq(b)), [-1, 0, -1, 0]); } #[simd_test] fn simd_lt_i32x4(simd: S) { let a = i32x4::from_slice(simd, &[1, 2, -3, -4]); let b = i32x4::from_slice(simd, &[2, 2, 0, 0]); - assert_eq!(*a.simd_lt(b), [-1, 0, -1, -1]); + assert_eq!(<[i32; 4]>::from(a.simd_lt(b)), [-1, 0, -1, -1]); } #[simd_test] fn simd_gt_i32x4(simd: S) { let a = i32x4::from_slice(simd, &[2, 2, 0, 0]); let b = i32x4::from_slice(simd, &[1, 2, -3, -4]); - assert_eq!(*a.simd_gt(b), [-1, 0, -1, -1]); + assert_eq!(<[i32; 4]>::from(a.simd_gt(b)), [-1, 0, -1, -1]); } #[simd_test] @@ -2587,21 +2620,21 @@ fn sub_u32x4(simd: S) { fn simd_eq_u32x4(simd: S) { let a = u32x4::from_slice(simd, &[1, 2, 2147483648, 4294967295]); let b = u32x4::from_slice(simd, &[1, 0, 2147483648, 0]); - assert_eq!(*a.simd_eq(b), [-1, 0, -1, 0]); + assert_eq!(<[i32; 4]>::from(a.simd_eq(b)), [-1, 0, -1, 0]); } #[simd_test] fn simd_lt_u32x4(simd: S) { let a = u32x4::from_slice(simd, &[1, 2, 100, 200]); let b = u32x4::from_slice(simd, &[2, 2, 3000000000, 150]); - assert_eq!(*a.simd_lt(b), [-1, 0, -1, 0]); + assert_eq!(<[i32; 4]>::from(a.simd_lt(b)), [-1, 0, -1, 0]); } #[simd_test] fn simd_gt_u32x4(simd: S) { let a = u32x4::from_slice(simd, &[2, 2, 3000000000, 150]); let b = u32x4::from_slice(simd, &[1, 2, 100, 200]); - assert_eq!(*a.simd_gt(b), [-1, 0, -1, 0]); + assert_eq!(<[i32; 4]>::from(a.simd_gt(b)), [-1, 0, -1, 0]); } #[simd_test] @@ -2922,7 +2955,7 @@ fn select_mask8x16(simd: S) { ); let result: mask8x16<_> = mask.select(b, c); assert_eq!( - *result, + <[i8; 16]>::from(result), [-1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1] ); } @@ -2952,7 +2985,7 @@ fn select_mask16x8(simd: S) { let b = mask16x8::from_slice(simd, &[-1, 0, -1, 0, -1, 0, -1, 0]); let c = mask16x8::from_slice(simd, &[0, -1, 0, -1, 0, -1, 0, -1]); let result: mask16x8<_> = mask.select(b, c); - assert_eq!(*result, [-1, 0, 0, -1, -1, 0, 0, -1]); + assert_eq!(<[i16; 8]>::from(result), [-1, 0, 0, -1, -1, 0, 0, -1]); } #[simd_test] @@ -3025,7 +3058,7 @@ fn select_mask32x4(simd: S) { let b = mask32x4::from_slice(simd, &[-1, -1, 0, 0]); let c = mask32x4::from_slice(simd, &[0, 0, -1, -1]); let result: mask32x4<_> = mask.select(b, c); - assert_eq!(*result, [-1, 0, 0, -1]); + assert_eq!(<[i32; 4]>::from(result), [-1, 0, 0, -1]); } #[simd_test] @@ -3347,7 +3380,7 @@ fn simd_eq_u8x16(simd: S) { &[1, 0, 128, 0, 255, 0, 7, 0, 1, 0, 128, 0, 255, 0, 7, 0], ); assert_eq!( - *a.simd_eq(b), + <[i8; 16]>::from(a.simd_eq(b)), [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0] ); } @@ -3362,7 +3395,10 @@ fn simd_ge_u8x16(simd: S) { ); let mask = vals.simd_ge(u8x16::splat(simd, 128)); - assert_eq!(*mask, [0, 0, 0, 0, -1, -1, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + <[i8; 16]>::from(mask), + [0, 0, 0, 0, -1, -1, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0] + ); } #[simd_test] @@ -3375,7 +3411,10 @@ fn simd_gt_u8x16(simd: S) { ); let mask = vals.simd_gt(u8x16::splat(simd, 128)); - assert_eq!(*mask, [0, 0, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + <[i8; 16]>::from(mask), + [0, 0, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0] + ); } #[simd_test] @@ -3389,7 +3428,7 @@ fn simd_le_u8x16(simd: S) { let mask = vals.simd_le(u8x16::splat(simd, 128)); assert_eq!( - *mask, + <[i8; 16]>::from(mask), [-1, -1, -1, -1, 0, 0, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1] ); } @@ -3405,7 +3444,7 @@ fn simd_lt_u8x16(simd: S) { let mask = vals.simd_lt(u8x16::splat(simd, 128)); assert_eq!( - *mask, + <[i8; 16]>::from(mask), [-1, -1, -1, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, -1, -1, -1] ); } @@ -3419,7 +3458,7 @@ fn simd_ge_i8x16(simd: S) { let mask = vals.simd_ge(i8x16::splat(simd, -1)); assert_eq!( - *mask, + <[i8; 16]>::from(mask), [-1, 0, 0, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1] ); } @@ -3724,6 +3763,14 @@ fn store_slice_f32x4(simd: S) { assert_eq!(dest, [1.0, 2.0, 3.0, 4.0]); } +#[simd_test] +fn store_slice_mask32x4(simd: S) { + let mask = mask32x4::from_slice(simd, &[-1, 0, -1, 0]); + let mut dest = [0_i32; 4]; + mask.store_slice(&mut dest); + assert_eq!(dest, [-1, 0, -1, 0]); +} + #[simd_test] fn slide_f32x4(simd: S) { let a = f32x4::from_slice(simd, &[1.0, 2.0, 3.0, 4.0]); @@ -4384,322 +4431,6 @@ fn slide_within_blocks_u32x16(simd: S) { assert_eq!(a.slide_within_blocks::<4>(b).as_slice(), b.as_slice()); } -#[simd_test] -fn slide_mask8x16(simd: S) { - let a: Vec = (1_i8..=16).collect(); - let b: Vec = (17_i8..=32).collect(); - let a = mask8x16::from_slice(simd, &a); - let b = mask8x16::from_slice(simd, &b); - let expected_0: Vec = (1_i8..=16).collect(); - let expected_8: Vec = (9_i8..=24).collect(); - let expected_16: Vec = (17_i8..=32).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<8>(b).as_slice(), &expected_8); - assert_eq!(a.slide::<16>(b).as_slice(), &expected_16); -} - -#[simd_test] -fn slide_within_blocks_mask8x16(simd: S) { - let a: Vec = (1_i8..=16).collect(); - let b: Vec = (17_i8..=32).collect(); - let a = mask8x16::from_slice(simd, &a); - let b = mask8x16::from_slice(simd, &b); - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!( - a.slide_within_blocks::<1>(b).as_slice(), - &[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,] - ); - assert_eq!(a.slide_within_blocks::<16>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask8x32(simd: S) { - let a: Vec = (1_i8..=32).collect(); - let b: Vec = (33_i8..=64).collect(); - let a = mask8x32::from_slice(simd, &a); - let b = mask8x32::from_slice(simd, &b); - let expected_0: Vec = (1_i8..=32).collect(); - let expected_16: Vec = (17_i8..=48).collect(); - let expected_32: Vec = (33_i8..=64).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<16>(b).as_slice(), &expected_16); - assert_eq!(a.slide::<32>(b).as_slice(), &expected_32); -} - -#[simd_test] -fn slide_within_blocks_mask8x32(simd: S) { - let a: Vec = (1_i8..=32).collect(); - let b: Vec = (33_i8..=64).collect(); - let a = mask8x32::from_slice(simd, &a); - let b = mask8x32::from_slice(simd, &b); - - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - - let expected_1: [i8; 32] = [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 33, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 49, - ]; - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &expected_1); - - assert_eq!(a.slide_within_blocks::<16>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask8x64(simd: S) { - let a: Vec = (0_i8..=63).collect(); - let b: Vec = (64_i8..=127).collect(); - let a = mask8x64::from_slice(simd, &a); - let b = mask8x64::from_slice(simd, &b); - let expected_0: Vec = (0_i8..=63).collect(); - let expected_32: Vec = (32_i8..=95).collect(); - let expected_64: Vec = (64_i8..=127).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<32>(b).as_slice(), &expected_32); - assert_eq!(a.slide::<64>(b).as_slice(), &expected_64); -} - -#[simd_test] -fn slide_within_blocks_mask8x64(simd: S) { - let a: Vec = (0_i8..=63).collect(); - let b: Vec = (64_i8..=127).collect(); - let a = mask8x64::from_slice(simd, &a); - let b = mask8x64::from_slice(simd, &b); - - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - - let expected_1: [i8; 64] = [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, - 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, - ]; - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &expected_1); - - assert_eq!(a.slide_within_blocks::<16>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask16x8(simd: S) { - let a: Vec = (1_i16..=8).collect(); - let b: Vec = (9_i16..=16).collect(); - let a = mask16x8::from_slice(simd, &a); - let b = mask16x8::from_slice(simd, &b); - let expected_0: Vec = (1_i16..=8).collect(); - let expected_4: Vec = (5_i16..=12).collect(); - let expected_8: Vec = (9_i16..=16).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<4>(b).as_slice(), &expected_4); - assert_eq!(a.slide::<8>(b).as_slice(), &expected_8); -} - -#[simd_test] -fn slide_within_blocks_mask16x8(simd: S) { - let a: Vec = (1_i16..=8).collect(); - let b: Vec = (9_i16..=16).collect(); - let a = mask16x8::from_slice(simd, &a); - let b = mask16x8::from_slice(simd, &b); - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!( - a.slide_within_blocks::<1>(b).as_slice(), - &[2, 3, 4, 5, 6, 7, 8, 9] - ); - assert_eq!(a.slide_within_blocks::<8>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask16x16(simd: S) { - let a: Vec = (1_i16..=16).collect(); - let b: Vec = (17_i16..=32).collect(); - let a = mask16x16::from_slice(simd, &a); - let b = mask16x16::from_slice(simd, &b); - let expected_0: Vec = (1_i16..=16).collect(); - let expected_8: Vec = (9_i16..=24).collect(); - let expected_16: Vec = (17_i16..=32).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<8>(b).as_slice(), &expected_8); - assert_eq!(a.slide::<16>(b).as_slice(), &expected_16); -} - -#[simd_test] -fn slide_within_blocks_mask16x16(simd: S) { - let a: Vec = (1_i16..=16).collect(); - let b: Vec = (17_i16..=32).collect(); - let a = mask16x16::from_slice(simd, &a); - let b = mask16x16::from_slice(simd, &b); - - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - - let expected_1: [i16; 16] = [2, 3, 4, 5, 6, 7, 8, 17, 10, 11, 12, 13, 14, 15, 16, 25]; - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &expected_1); - - assert_eq!(a.slide_within_blocks::<8>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask16x32(simd: S) { - let a: Vec = (1_i16..=32).collect(); - let b: Vec = (33_i16..=64).collect(); - let a = mask16x32::from_slice(simd, &a); - let b = mask16x32::from_slice(simd, &b); - let expected_0: Vec = (1_i16..=32).collect(); - let expected_16: Vec = (17_i16..=48).collect(); - let expected_32: Vec = (33_i16..=64).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<16>(b).as_slice(), &expected_16); - assert_eq!(a.slide::<32>(b).as_slice(), &expected_32); -} - -#[simd_test] -fn slide_within_blocks_mask16x32(simd: S) { - let a: Vec = (1_i16..=32).collect(); - let b: Vec = (33_i16..=64).collect(); - let a = mask16x32::from_slice(simd, &a); - let b = mask16x32::from_slice(simd, &b); - - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - - let expected_1: [i16; 32] = [ - 2, 3, 4, 5, 6, 7, 8, 33, 10, 11, 12, 13, 14, 15, 16, 41, 18, 19, 20, 21, 22, 23, 24, 49, - 26, 27, 28, 29, 30, 31, 32, 57, - ]; - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &expected_1); - - assert_eq!(a.slide_within_blocks::<8>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask32x4(simd: S) { - let a = mask32x4::from_slice(simd, &[1, 2, 3, 4]); - let b = mask32x4::from_slice(simd, &[5, 6, 7, 8]); - assert_eq!(*a.slide::<0>(b), [1, 2, 3, 4]); - assert_eq!(*a.slide::<2>(b), [3, 4, 5, 6]); - assert_eq!(*a.slide::<4>(b), [5, 6, 7, 8]); -} - -#[simd_test] -fn slide_within_blocks_mask32x4(simd: S) { - let a = mask32x4::from_slice(simd, &[1, 2, 3, 4]); - let b = mask32x4::from_slice(simd, &[5, 6, 7, 8]); - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &[2, 3, 4, 5]); - assert_eq!(a.slide_within_blocks::<4>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask32x8(simd: S) { - let a = mask32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); - let b = mask32x8::from_slice(simd, &[9, 10, 11, 12, 13, 14, 15, 16]); - assert_eq!(*a.slide::<0>(b), [1, 2, 3, 4, 5, 6, 7, 8]); - assert_eq!(*a.slide::<4>(b), [5, 6, 7, 8, 9, 10, 11, 12]); - assert_eq!(*a.slide::<8>(b), [9, 10, 11, 12, 13, 14, 15, 16]); -} - -#[simd_test] -fn slide_within_blocks_mask32x8(simd: S) { - let a = mask32x8::from_slice(simd, &[1, 2, 3, 4, 5, 6, 7, 8]); - let b = mask32x8::from_slice(simd, &[9, 10, 11, 12, 13, 14, 15, 16]); - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!( - a.slide_within_blocks::<1>(b).as_slice(), - &[2, 3, 4, 9, 6, 7, 8, 13] - ); - assert_eq!(a.slide_within_blocks::<4>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask32x16(simd: S) { - let a: Vec = (1..=16).collect(); - let b: Vec = (17..=32).collect(); - let a = mask32x16::from_slice(simd, &a); - let b = mask32x16::from_slice(simd, &b); - let expected_0: Vec = (1..=16).collect(); - let expected_8: Vec = (9..=24).collect(); - let expected_16: Vec = (17..=32).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<8>(b).as_slice(), &expected_8); - assert_eq!(a.slide::<16>(b).as_slice(), &expected_16); -} - -#[simd_test] -fn slide_within_blocks_mask32x16(simd: S) { - let a: Vec = (1..=16).collect(); - let b: Vec = (17..=32).collect(); - let a = mask32x16::from_slice(simd, &a); - let b = mask32x16::from_slice(simd, &b); - - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - - let expected_1: [i32; 16] = [2, 3, 4, 17, 6, 7, 8, 21, 10, 11, 12, 25, 14, 15, 16, 29]; - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &expected_1); - - assert_eq!(a.slide_within_blocks::<4>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask64x2(simd: S) { - let a = mask64x2::from_slice(simd, &[1, 2]); - let b = mask64x2::from_slice(simd, &[3, 4]); - assert_eq!(*a.slide::<0>(b), [1, 2]); - assert_eq!(*a.slide::<1>(b), [2, 3]); - assert_eq!(*a.slide::<2>(b), [3, 4]); -} - -#[simd_test] -fn slide_within_blocks_mask64x2(simd: S) { - let a = mask64x2::from_slice(simd, &[1, 2]); - let b = mask64x2::from_slice(simd, &[3, 4]); - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &[2, 3]); - assert_eq!(a.slide_within_blocks::<2>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask64x4(simd: S) { - let a = mask64x4::from_slice(simd, &[1, 2, 3, 4]); - let b = mask64x4::from_slice(simd, &[5, 6, 7, 8]); - assert_eq!(*a.slide::<0>(b), [1, 2, 3, 4]); - assert_eq!(*a.slide::<2>(b), [3, 4, 5, 6]); - assert_eq!(*a.slide::<4>(b), [5, 6, 7, 8]); -} - -#[simd_test] -fn slide_within_blocks_mask64x4(simd: S) { - // 256-bit vector partitioned into two 128-bit blocks (2 i64 per block) - let a = mask64x4::from_slice(simd, &[1, 2, 3, 4]); - let b = mask64x4::from_slice(simd, &[5, 6, 7, 8]); - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!(a.slide_within_blocks::<1>(b).as_slice(), &[2, 5, 4, 7]); - assert_eq!(a.slide_within_blocks::<2>(b).as_slice(), b.as_slice()); -} - -#[simd_test] -fn slide_mask64x8(simd: S) { - let a: Vec = (1..=8).collect(); - let b: Vec = (9..=16).collect(); - let a = mask64x8::from_slice(simd, &a); - let b = mask64x8::from_slice(simd, &b); - let expected_0: Vec = (1..=8).collect(); - let expected_4: Vec = (5..=12).collect(); - let expected_8: Vec = (9..=16).collect(); - assert_eq!(a.slide::<0>(b).as_slice(), &expected_0); - assert_eq!(a.slide::<4>(b).as_slice(), &expected_4); - assert_eq!(a.slide::<8>(b).as_slice(), &expected_8); -} - -#[simd_test] -fn slide_within_blocks_mask64x8(simd: S) { - let a: Vec = (1..=8).collect(); - let b: Vec = (9..=16).collect(); - let a = mask64x8::from_slice(simd, &a); - let b = mask64x8::from_slice(simd, &b); - - assert_eq!(a.slide_within_blocks::<0>(b).as_slice(), a.as_slice()); - assert_eq!( - a.slide_within_blocks::<1>(b).as_slice(), - &[2, 9, 4, 11, 6, 13, 8, 15] - ); - assert_eq!(a.slide_within_blocks::<2>(b).as_slice(), b.as_slice()); -} - // Because the slide amount is a const generic, the exhaustive tests have to *compile* one slide per amount per vector // type. Disable them entirely.` #[cfg(false)]