diff --git a/fearless_simd/Cargo.toml b/fearless_simd/Cargo.toml index c9cf895a..c0482717 100644 --- a/fearless_simd/Cargo.toml +++ b/fearless_simd/Cargo.toml @@ -33,10 +33,6 @@ std = [] # Use floating point implementations from libm libm = ["dep:libm"] -# Include safe wrappers for (some) target feature specific intrinsics, -# beyond the basic SIMD operations abstracted on all platforms -safe_wrappers = [] - # Force the "fallback" SIMD level to be supported # This is primarily used for tests force_support_fallback = [] diff --git a/fearless_simd/README.md b/fearless_simd/README.md index 883be8be..f9d8c227 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -151,8 +151,6 @@ The following crate [feature flags](https://doc.rust-lang.org/cargo/reference/fe - `std` (enabled by default): Get floating point functions from the standard library (likely using your target's libc). Also allows using [`Level::new`] on all platforms, to detect which target features are enabled. - `libm`: Use floating point implementations from [libm]. -- `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics, - beyond the basic SIMD operations abstracted on all platforms. - `force_support_fallback`: Force scalar fallback, to be supported, even if your compilation target has a better baseline. At least one of `std` and `libm` is required; `std` overrides `libm`. diff --git a/fearless_simd/src/core_arch/aarch64/mod.rs b/fearless_simd/src/core_arch/aarch64/mod.rs deleted file mode 100644 index 86f5f0d7..00000000 --- a/fearless_simd/src/core_arch/aarch64/mod.rs +++ /dev/null @@ -1,4070 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to intrinsics on aarch64. - -// These implementations are cut and pasted from pulp. - -/// A token for Neon intrinsics on aarch64. -#[derive(Clone, Copy, Debug)] -pub struct Neon { - _private: (), -} - -impl Neon { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } -} - -#[cfg(feature = "safe_wrappers")] -use {crate::impl_macros::delegate, core::arch::aarch64::*}; - -#[cfg(feature = "safe_wrappers")] -type p8 = u8; -#[cfg(feature = "safe_wrappers")] -type p16 = u16; -#[cfg(feature = "safe_wrappers")] -type p64 = u64; -#[cfg(feature = "safe_wrappers")] -type p128 = u128; - -#[cfg(feature = "safe_wrappers")] -#[expect( - clippy::missing_safety_doc, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] -impl Neon { - delegate! { core::arch::aarch64: - fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; - fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; - fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; - fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t; - fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t; - fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t; - fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t; - fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t; - fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t; - fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t; - fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t; - fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t; - fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t; - fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t; - fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t; - fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t; - fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t; - fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t; - fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t; - fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t; - fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t; - fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t; - fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vabs_f32(a: float32x2_t) -> float32x2_t; - fn vabsq_f32(a: float32x4_t) -> float32x4_t; - fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t; - fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t; - fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t; - fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t; - fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t; - fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t; - fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t; - fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t; - fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t; - fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t; - fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t; - fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t; - fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t; - fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t; - fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t; - fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t; - fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t; - fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t; - fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t; - fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t; - fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t; - fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t; - fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t; - fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t; - fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcls_s8(a: int8x8_t) -> int8x8_t; - fn vclsq_s8(a: int8x16_t) -> int8x16_t; - fn vcls_s16(a: int16x4_t) -> int16x4_t; - fn vclsq_s16(a: int16x8_t) -> int16x8_t; - fn vcls_s32(a: int32x2_t) -> int32x2_t; - fn vclsq_s32(a: int32x4_t) -> int32x4_t; - fn vcls_u8(a: uint8x8_t) -> int8x8_t; - fn vclsq_u8(a: uint8x16_t) -> int8x16_t; - fn vcls_u16(a: uint16x4_t) -> int16x4_t; - fn vclsq_u16(a: uint16x8_t) -> int16x8_t; - fn vcls_u32(a: uint32x2_t) -> int32x2_t; - fn vclsq_u32(a: uint32x4_t) -> int32x4_t; - fn vclz_s8(a: int8x8_t) -> int8x8_t; - fn vclzq_s8(a: int8x16_t) -> int8x16_t; - fn vclz_s16(a: int16x4_t) -> int16x4_t; - fn vclzq_s16(a: int16x8_t) -> int16x8_t; - fn vclz_s32(a: int32x2_t) -> int32x2_t; - fn vclzq_s32(a: int32x4_t) -> int32x4_t; - fn vclz_u8(a: uint8x8_t) -> uint8x8_t; - fn vclzq_u8(a: uint8x16_t) -> uint8x16_t; - fn vclz_u16(a: uint16x4_t) -> uint16x4_t; - fn vclzq_u16(a: uint16x8_t) -> uint16x8_t; - fn vclz_u32(a: uint32x2_t) -> uint32x2_t; - fn vclzq_u32(a: uint32x4_t) -> uint32x4_t; - fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t; - fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t; - fn vcreate_s8(a: u64) -> int8x8_t; - fn vcreate_s16(a: u64) -> int16x4_t; - fn vcreate_s32(a: u64) -> int32x2_t; - fn vcreate_s64(a: u64) -> int64x1_t; - fn vcreate_u8(a: u64) -> uint8x8_t; - fn vcreate_u16(a: u64) -> uint16x4_t; - fn vcreate_u32(a: u64) -> uint32x2_t; - fn vcreate_u64(a: u64) -> uint64x1_t; - fn vcreate_p8(a: u64) -> poly8x8_t; - fn vcreate_p16(a: u64) -> poly16x4_t; - fn vcreate_f32(a: u64) -> float32x2_t; - fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t; - fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t; - fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t; - fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t; - fn vcvt_n_f32_s32(a: int32x2_t) -> float32x2_t; - fn vcvtq_n_f32_s32(a: int32x4_t) -> float32x4_t; - fn vcvt_n_f32_u32(a: uint32x2_t) -> float32x2_t; - fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t; - fn vcvt_n_s32_f32(a: float32x2_t) -> int32x2_t; - fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t; - fn vcvt_n_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t; - fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t; - fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vdup_lane_s8(a: int8x8_t) -> int8x8_t; - fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t; - fn vdup_lane_s16(a: int16x4_t) -> int16x4_t; - fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t; - fn vdup_lane_s32(a: int32x2_t) -> int32x2_t; - fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t; - fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t; - fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t; - fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t; - fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t; - fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t; - fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t; - fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t; - fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t; - fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t; - fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t; - fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t; - fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t; - fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t; - fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t; - fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t; - fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t; - fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t; - fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t; - fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t; - fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t; - fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t; - fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t; - fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t; - fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t; - fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t; - fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t; - fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t; - fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t; - fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t; - fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t; - fn vdup_lane_f32(a: float32x2_t) -> float32x2_t; - fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t; - fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t; - fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t; - fn vdup_lane_s64(a: int64x1_t) -> int64x1_t; - fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t; - fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t; - fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t; - fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; - fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t; - fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t; - fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t; - fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t; - fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t; - fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; - fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; - fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t; - fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t; - fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t; - fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t; - fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t; - fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t; - fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t; - fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t; - fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t; - fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t; - fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t; - fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t; - fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t; - fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t; - fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vmla_lane_u16( - a: uint16x4_t, - b: uint16x4_t, - c: uint16x4_t, - ) -> uint16x4_t; - fn vmla_laneq_u16( - a: uint16x4_t, - b: uint16x4_t, - c: uint16x8_t, - ) -> uint16x4_t; - fn vmlaq_lane_u16( - a: uint16x8_t, - b: uint16x8_t, - c: uint16x4_t, - ) -> uint16x8_t; - fn vmlaq_laneq_u16( - a: uint16x8_t, - b: uint16x8_t, - c: uint16x8_t, - ) -> uint16x8_t; - fn vmla_lane_u32( - a: uint32x2_t, - b: uint32x2_t, - c: uint32x2_t, - ) -> uint32x2_t; - fn vmla_laneq_u32( - a: uint32x2_t, - b: uint32x2_t, - c: uint32x4_t, - ) -> uint32x2_t; - fn vmlaq_lane_u32( - a: uint32x4_t, - b: uint32x4_t, - c: uint32x2_t, - ) -> uint32x4_t; - fn vmlaq_laneq_u32( - a: uint32x4_t, - b: uint32x4_t, - c: uint32x4_t, - ) -> uint32x4_t; - fn vmla_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - ) -> float32x2_t; - fn vmla_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, - ) -> float32x2_t; - fn vmlaq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, - ) -> float32x4_t; - fn vmlaq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - ) -> float32x4_t; - fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t; - fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t; - fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t; - fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t; - fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t; - fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t; - fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t; - fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t; - fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t; - fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t; - fn vmlal_lane_u16( - a: uint32x4_t, - b: uint16x4_t, - c: uint16x4_t, - ) -> uint32x4_t; - fn vmlal_laneq_u16( - a: uint32x4_t, - b: uint16x4_t, - c: uint16x8_t, - ) -> uint32x4_t; - fn vmlal_lane_u32( - a: uint64x2_t, - b: uint32x2_t, - c: uint32x2_t, - ) -> uint64x2_t; - fn vmlal_laneq_u32( - a: uint64x2_t, - b: uint32x2_t, - c: uint32x4_t, - ) -> uint64x2_t; - fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; - fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t; - fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t; - fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t; - fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t; - fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t; - fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; - fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; - fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t; - fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t; - fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t; - fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t; - fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t; - fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t; - fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t; - fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t; - fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t; - fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t; - fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t; - fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t; - fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t; - fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t; - fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vmls_lane_u16( - a: uint16x4_t, - b: uint16x4_t, - c: uint16x4_t, - ) -> uint16x4_t; - fn vmls_laneq_u16( - a: uint16x4_t, - b: uint16x4_t, - c: uint16x8_t, - ) -> uint16x4_t; - fn vmlsq_lane_u16( - a: uint16x8_t, - b: uint16x8_t, - c: uint16x4_t, - ) -> uint16x8_t; - fn vmlsq_laneq_u16( - a: uint16x8_t, - b: uint16x8_t, - c: uint16x8_t, - ) -> uint16x8_t; - fn vmls_lane_u32( - a: uint32x2_t, - b: uint32x2_t, - c: uint32x2_t, - ) -> uint32x2_t; - fn vmls_laneq_u32( - a: uint32x2_t, - b: uint32x2_t, - c: uint32x4_t, - ) -> uint32x2_t; - fn vmlsq_lane_u32( - a: uint32x4_t, - b: uint32x4_t, - c: uint32x2_t, - ) -> uint32x4_t; - fn vmlsq_laneq_u32( - a: uint32x4_t, - b: uint32x4_t, - c: uint32x4_t, - ) -> uint32x4_t; - fn vmls_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - ) -> float32x2_t; - fn vmls_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, - ) -> float32x2_t; - fn vmlsq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, - ) -> float32x4_t; - fn vmlsq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - ) -> float32x4_t; - fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t; - fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t; - fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t; - fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t; - fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t; - fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t; - fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t; - fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t; - fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t; - fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t; - fn vmlsl_lane_u16( - a: uint32x4_t, - b: uint16x4_t, - c: uint16x4_t, - ) -> uint32x4_t; - fn vmlsl_laneq_u16( - a: uint32x4_t, - b: uint16x4_t, - c: uint16x8_t, - ) -> uint32x4_t; - fn vmlsl_lane_u32( - a: uint64x2_t, - b: uint32x2_t, - c: uint32x2_t, - ) -> uint64x2_t; - fn vmlsl_laneq_u32( - a: uint64x2_t, - b: uint32x2_t, - c: uint32x4_t, - ) -> uint64x2_t; - fn vneg_s8(a: int8x8_t) -> int8x8_t; - fn vnegq_s8(a: int8x16_t) -> int8x16_t; - fn vneg_s16(a: int16x4_t) -> int16x4_t; - fn vnegq_s16(a: int16x8_t) -> int16x8_t; - fn vneg_s32(a: int32x2_t) -> int32x2_t; - fn vnegq_s32(a: int32x4_t) -> int32x4_t; - fn vneg_f32(a: float32x2_t) -> float32x2_t; - fn vnegq_f32(a: float32x4_t) -> float32x4_t; - fn vqneg_s8(a: int8x8_t) -> int8x8_t; - fn vqnegq_s8(a: int8x16_t) -> int8x16_t; - fn vqneg_s16(a: int16x4_t) -> int16x4_t; - fn vqnegq_s16(a: int16x8_t) -> int16x8_t; - fn vqneg_s32(a: int32x2_t) -> int32x2_t; - fn vqnegq_s32(a: int32x4_t) -> int32x4_t; - fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vrndn_f32(a: float32x2_t) -> float32x2_t; - fn vrndnq_f32(a: float32x4_t) -> float32x4_t; - fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t; - unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t; - unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t; - unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t; - unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t; - unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t; - unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t; - unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t; - unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t; - unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t; - unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t; - unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t; - unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t; - unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t; - unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t; - unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t; - unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t; - unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t; - unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t; - unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t; - unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t; - unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t; - unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t; - unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t; - unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t; - unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t; - unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t; - unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t; - unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t; - unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t; - unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t; - unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t; - unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t; - unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t; - unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t; - unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t; - unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t; - unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t; - unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t; - unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t; - unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t; - unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t; - unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t; - unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t; - unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t; - unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t; - unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t; - unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t; - unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t; - unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t; - unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t; - unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t; - unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t; - unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t; - unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t; - unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t; - unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t; - unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t; - unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t; - unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t; - unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t; - unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t; - unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t; - unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t; - unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t; - unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t; - unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t; - unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t; - unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t; - unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t; - unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t; - unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t; - unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t; - unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t; - unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t; - unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t; - unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t; - unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t; - unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t; - unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t; - unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t; - unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t; - unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t; - unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t; - unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t; - unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t; - unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t; - unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t; - unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t; - unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t; - unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t; - unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t; - unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t; - unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t; - unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t; - unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t; - unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t; - unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t; - unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t; - unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t; - unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t; - unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t; - unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t; - unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t; - unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t; - unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t; - unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t; - unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t; - unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t; - unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t; - unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t; - unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t; - unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t; - unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t; - unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t; - unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t; - unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t; - unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t; - unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t; - unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t; - unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) - -> float32x4x2_t; - unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t; - unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t; - unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t; - unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t; - unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t; - unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t; - unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t; - unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t; - unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t; - unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t; - unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t; - unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t; - unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t; - unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t; - unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t; - unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t; - unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t; - unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t; - unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t; - unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t; - unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t; - unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t; - unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t; - unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t; - unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t; - unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t; - unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t; - unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t; - unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t; - unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t; - unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t; - unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t; - unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t; - unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t; - unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t; - unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t; - unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t; - unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t; - unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t; - unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t; - unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t; - unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t; - unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t; - unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t; - unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t; - unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t; - unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t; - unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t; - unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t; - unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t; - unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t; - unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t; - unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t; - unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t; - unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) - -> float32x4x3_t; - unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t; - unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t; - unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t; - unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t; - unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t; - unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t; - unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t; - unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t; - unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t; - unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t; - unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t; - unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t; - unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t; - unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t; - unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t; - unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t; - unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t; - unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t; - unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t; - unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t; - unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t; - unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t; - unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t; - unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t; - unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t; - unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t; - unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t; - unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t; - unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t; - unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t; - unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t; - unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t; - unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t; - unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t; - unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t; - unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t; - unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t; - unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t; - unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t; - unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t; - unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t; - unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t; - unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t; - unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t; - unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t; - unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t; - unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t; - unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t; - unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t; - unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t; - unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t; - unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t; - unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t; - unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t; - unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) - -> float32x4x4_t; - unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t); - unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t); - unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t); - unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t); - unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t); - unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t); - unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t); - unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t); - unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t); - unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t); - unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t); - unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t); - unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t); - unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t); - unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t); - unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t); - unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t); - unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t); - unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t); - unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t); - unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t); - unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t); - unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t); - unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t); - unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t); - unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t); - unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t); - unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t); - unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t); - unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t); - unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t); - unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t); - unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t); - unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t); - unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t); - unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t); - unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t); - unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t); - unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t); - unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t); - unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t); - unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t); - unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t); - unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t); - unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t); - unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t); - unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t); - unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t); - unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t); - unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t); - unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t); - unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t); - unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t); - unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t); - unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t); - unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t); - unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t); - unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t); - unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t); - unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t); - unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t); - unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t); - unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t); - unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t); - unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t); - unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t); - unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t); - unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t); - unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t); - unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t); - unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t); - unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t); - unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t); - unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t); - unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t); - unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t); - unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t); - unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t); - unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t); - unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t); - unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t); - unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t); - unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t); - unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t); - unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t); - unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t); - unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t); - unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t); - unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t); - unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t); - unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t); - unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t); - unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t); - unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t); - unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t); - unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t); - unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t); - unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t); - unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t); - unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t); - unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t); - unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t); - unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t); - unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t); - unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t); - unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t); - unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t); - unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t); - unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t); - unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t); - unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t); - unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t); - unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t); - unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t); - unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t); - unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t); - unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t); - unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t); - unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t); - unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t); - unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t); - unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t); - unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t); - unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t); - unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t); - unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t); - unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t); - unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t); - unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t); - unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t); - unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t); - unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t); - unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t); - unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t); - unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t); - unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t); - unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t); - unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t); - unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t); - unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t); - unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t); - unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t); - unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t); - unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t); - unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t); - unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t); - unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t); - unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t); - unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t); - unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t); - unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t); - unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t); - unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t); - unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t); - unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t); - unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t); - unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t); - unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t); - unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t); - unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t); - unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t); - unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t); - unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t); - unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t); - unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t); - unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t); - unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t); - unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t); - unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t); - unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t); - unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t); - unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t); - unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t); - unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t); - unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t); - unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t); - unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t); - unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t); - unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t); - unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t); - unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t); - unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t); - unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t); - unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t); - unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t); - unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t); - unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t); - unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t); - unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t); - unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t); - unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t); - unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t); - unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t); - unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t); - fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t; - fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t; - fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t; - fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t; - fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t; - fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t; - fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t; - fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t; - fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t; - fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t; - fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t; - fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t; - fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t; - fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t; - fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t; - fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t; - fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t; - fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t; - fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t; - fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t; - fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t; - fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; - fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; - fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; - fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; - fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t; - fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t; - fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t; - fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t; - fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t; - fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t; - fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; - fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t; - fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; - fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t; - fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; - fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; - fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t; - fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t; - fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; - fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; - fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t; - fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t; - fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t; - fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vaddq_p128(a: p128, b: p128) -> p128; - fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; - fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; - fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; - fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t; - fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t; - fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t; - fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t; - fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t; - fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t; - fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t; - fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t; - fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t; - fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t; - fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t; - fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t; - fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t; - fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t; - fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t; - fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t; - fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; - fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; - fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; - fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t; - fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t; - fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t; - fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t; - fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t; - fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t; - fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t; - fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t; - fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t; - fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t; - fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t; - fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t; - fn vqmovn_s16(a: int16x8_t) -> int8x8_t; - fn vqmovn_s32(a: int32x4_t) -> int16x4_t; - fn vqmovn_s64(a: int64x2_t) -> int32x2_t; - fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t; - fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t; - fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t; - fn vqmovun_s16(a: int16x8_t) -> uint8x8_t; - fn vqmovun_s32(a: int32x4_t) -> uint16x4_t; - fn vqmovun_s64(a: int64x2_t) -> uint32x2_t; - fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t; - fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t; - fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t; - fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t; - fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t; - fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t; - fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t; - fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t; - fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t; - fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t; - fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t; - fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t; - fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t; - fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t; - fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t; - fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t; - fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t; - fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - fn vqshl_n_s8(a: int8x8_t) -> int8x8_t; - fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t; - fn vqshl_n_s16(a: int16x4_t) -> int16x4_t; - fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t; - fn vqshl_n_s32(a: int32x2_t) -> int32x2_t; - fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t; - fn vqshl_n_s64(a: int64x1_t) -> int64x1_t; - fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t; - fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t; - fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t; - fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t; - fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t; - fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t; - fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t; - fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t; - fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t; - fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t; - fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t; - fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t; - fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t; - fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t; - fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t; - fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t; - fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t; - fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t; - fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t; - fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t; - fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t; - fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t; - fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t; - fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t; - fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t; - fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t; - fn vrsqrte_f32(a: float32x2_t) -> float32x2_t; - fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t; - fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t; - fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t; - fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vrecpe_f32(a: float32x2_t) -> float32x2_t; - fn vrecpeq_f32(a: float32x4_t) -> float32x4_t; - fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t; - fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t; - fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t; - fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t; - fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t; - fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t; - fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t; - fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t; - fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t; - fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t; - fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t; - fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t; - fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t; - fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t; - fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t; - fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t; - fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t; - fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t; - fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t; - fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t; - fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t; - fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t; - fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t; - fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t; - fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t; - fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t; - fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t; - fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t; - fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t; - fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t; - fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t; - fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t; - fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t; - fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t; - fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t; - fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t; - fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t; - fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t; - fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t; - fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t; - fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t; - fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t; - fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t; - fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t; - fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t; - fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t; - fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t; - fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t; - fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t; - fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t; - fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t; - fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t; - fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t; - fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t; - fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t; - fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t; - fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t; - fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t; - fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t; - fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t; - fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t; - fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t; - fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t; - fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t; - fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t; - fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t; - fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t; - fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t; - fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t; - fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t; - fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t; - fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t; - fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t; - fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t; - fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t; - fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t; - fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t; - fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t; - fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t; - fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t; - fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t; - fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t; - fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t; - fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t; - fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t; - fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t; - fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t; - fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t; - fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t; - fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t; - fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t; - fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t; - fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t; - fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t; - fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t; - fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t; - fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t; - fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t; - fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t; - fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t; - fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t; - fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t; - fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t; - fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t; - fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t; - fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t; - fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t; - fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t; - fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t; - fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t; - fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t; - fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t; - fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t; - fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t; - fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t; - fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t; - fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t; - fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t; - fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t; - fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t; - fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t; - fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t; - fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t; - fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t; - fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t; - fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t; - fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t; - fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t; - fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t; - fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t; - fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t; - fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t; - fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t; - fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t; - fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t; - fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t; - fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t; - fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t; - fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t; - fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t; - fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t; - fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t; - fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t; - fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t; - fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t; - fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t; - fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t; - fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t; - fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t; - fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t; - fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t; - fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t; - fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t; - fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t; - fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t; - fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t; - fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t; - fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t; - fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t; - fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t; - fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t; - fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t; - fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t; - fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t; - fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t; - fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t; - fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t; - fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t; - fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t; - fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t; - fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t; - fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t; - fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t; - fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t; - fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t; - fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t; - fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t; - fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t; - fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t; - fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t; - fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t; - fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t; - fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t; - fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t; - fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t; - fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t; - fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t; - fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t; - fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t; - fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t; - fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t; - fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t; - fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t; - fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t; - fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t; - fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t; - fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t; - fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t; - fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t; - fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t; - fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t; - fn vreinterpretq_p128_f32(a: float32x4_t) -> p128; - fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t; - fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t; - fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t; - fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t; - fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t; - fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t; - fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t; - fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t; - fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t; - fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t; - fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t; - fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t; - fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t; - fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t; - fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t; - fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t; - fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t; - fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t; - fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t; - fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t; - fn vreinterpretq_f32_p128(a: p128) -> float32x4_t; - fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - fn vrshr_n_s8(a: int8x8_t) -> int8x8_t; - fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t; - fn vrshr_n_s16(a: int16x4_t) -> int16x4_t; - fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t; - fn vrshr_n_s32(a: int32x2_t) -> int32x2_t; - fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t; - fn vrshr_n_s64(a: int64x1_t) -> int64x1_t; - fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t; - fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t; - fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t; - fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t; - fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t; - fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t; - fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t; - fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t; - fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t; - fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t; - fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t; - fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t; - fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t; - fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t; - fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t; - fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; - fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; - fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; - fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t; - fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t; - fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t; - fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t; - fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t; - fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t; - fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t; - fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t; - fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t; - fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t; - fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t; - fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t; - fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t; - fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t; - fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t; - fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t; - fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t; - fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t; - fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t; - fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t; - fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t; - fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t; - fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t; - fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t; - fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t; - fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - fn vshl_n_s8(a: int8x8_t) -> int8x8_t; - fn vshlq_n_s8(a: int8x16_t) -> int8x16_t; - fn vshl_n_s16(a: int16x4_t) -> int16x4_t; - fn vshlq_n_s16(a: int16x8_t) -> int16x8_t; - fn vshl_n_s32(a: int32x2_t) -> int32x2_t; - fn vshlq_n_s32(a: int32x4_t) -> int32x4_t; - fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t; - fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t; - fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t; - fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t; - fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t; - fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t; - fn vshl_n_s64(a: int64x1_t) -> int64x1_t; - fn vshlq_n_s64(a: int64x2_t) -> int64x2_t; - fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t; - fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t; - fn vshll_n_s8(a: int8x8_t) -> int16x8_t; - fn vshll_n_s16(a: int16x4_t) -> int32x4_t; - fn vshll_n_s32(a: int32x2_t) -> int64x2_t; - fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t; - fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t; - fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t; - fn vshr_n_s8(a: int8x8_t) -> int8x8_t; - fn vshrq_n_s8(a: int8x16_t) -> int8x16_t; - fn vshr_n_s16(a: int16x4_t) -> int16x4_t; - fn vshrq_n_s16(a: int16x8_t) -> int16x8_t; - fn vshr_n_s32(a: int32x2_t) -> int32x2_t; - fn vshrq_n_s32(a: int32x4_t) -> int32x4_t; - fn vshr_n_s64(a: int64x1_t) -> int64x1_t; - fn vshrq_n_s64(a: int64x2_t) -> int64x2_t; - fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t; - fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t; - fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t; - fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t; - fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t; - fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t; - fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t; - fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t; - fn vshrn_n_s16(a: int16x8_t) -> int8x8_t; - fn vshrn_n_s32(a: int32x4_t) -> int16x4_t; - fn vshrn_n_s64(a: int64x2_t) -> int32x2_t; - fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t; - fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t; - fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t; - fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t; - fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t; - fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t; - fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t; - fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t; - fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t; - fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t; - fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t; - fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t; - fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t; - fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t; - fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t; - fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t; - fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t; - fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t; - fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t; - fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t; - fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t; - fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t; - fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t; - fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t; - fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t; - fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t; - fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t; - fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t; - fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t; - fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t; - fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t; - fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t; - fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t; - fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t; - fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t; - fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t; - fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t; - fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t; - fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t; - fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t; - fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t; - fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t; - fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t; - fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t; - fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t; - fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t; - fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t; - fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t; - fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t; - fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t; - fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t; - fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t; - fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t; - fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t; - fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t; - fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t; - fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t; - fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t; - fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t; - fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t; - fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t; - fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t; - fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t; - fn vqabs_s8(a: int8x8_t) -> int8x8_t; - fn vqabsq_s8(a: int8x16_t) -> int8x16_t; - fn vqabs_s16(a: int16x4_t) -> int16x4_t; - fn vqabsq_s16(a: int16x8_t) -> int16x8_t; - fn vqabs_s32(a: int32x2_t) -> int32x2_t; - fn vqabsq_s32(a: int32x4_t) -> int32x4_t; - - unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t; - unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t; - unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t; - unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t; - unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t; - unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t; - unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t; - unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t; - unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t; - unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t; - unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t; - unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t; - unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t; - unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t; - unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t; - unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t; - unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t; - unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t; - unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t; - unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t; - unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t; - unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) - -> float32x4_t; - unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t; - unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t; - unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t; - unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t; - unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t; - unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t; - unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t; - unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t; - unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t; - unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t; - unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t; - unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t; - unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t; - unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t; - unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t; - unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t; - unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t; - unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t; - unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t; - unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t; - unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t; - unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t; - fn vaba_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - fn vaba_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vaba_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vaba_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t; - fn vaba_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t; - fn vaba_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t; - fn vabaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; - fn vabaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vabaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vabaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t; - fn vabaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t; - fn vabaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - fn vabs_s8(a: int8x8_t) -> int8x8_t; - fn vabs_s16(a: int16x4_t) -> int16x4_t; - fn vabs_s32(a: int32x2_t) -> int32x2_t; - fn vabsq_s8(a: int8x16_t) -> int8x16_t; - fn vabsq_s16(a: int16x8_t) -> int16x8_t; - fn vabsq_s32(a: int32x4_t) -> int32x4_t; - fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t; - fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t; - fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t; - fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; - fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; - fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; - fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t; - fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t; - fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t; - fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t; - fn vaddw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t; - fn vaddw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t; - fn vaddw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t; - fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t; - fn vaddw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t; - fn vaddw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t; - fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t; - fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t; - fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t; - fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; - fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; - fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; - fn vaddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; - fn vaddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; - fn vaddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; - fn vaddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t; - fn vaddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t; - fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t; - fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t; - fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t; - fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t; - fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t; - fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t; - fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t; - fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t; - fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t; - fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t; - fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t; - fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t; - fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t; - fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t; - fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t; - fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t; - fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t; - fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t; - fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t; - fn vpaddl_s8(a: int8x8_t) -> int16x4_t; - fn vpaddl_s16(a: int16x4_t) -> int32x2_t; - fn vpaddl_s32(a: int32x2_t) -> int64x1_t; - fn vpaddlq_s8(a: int8x16_t) -> int16x8_t; - fn vpaddlq_s16(a: int16x8_t) -> int32x4_t; - fn vpaddlq_s32(a: int32x4_t) -> int64x2_t; - fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t; - fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t; - fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t; - fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t; - fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t; - fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t; - fn vmovn_s16(a: int16x8_t) -> int8x8_t; - fn vmovn_s32(a: int32x4_t) -> int16x4_t; - fn vmovn_s64(a: int64x2_t) -> int32x2_t; - fn vmovn_u16(a: uint16x8_t) -> uint8x8_t; - fn vmovn_u32(a: uint32x4_t) -> uint16x4_t; - fn vmovn_u64(a: uint64x2_t) -> uint32x2_t; - fn vmovl_s8(a: int8x8_t) -> int16x8_t; - fn vmovl_s16(a: int16x4_t) -> int32x4_t; - fn vmovl_s32(a: int32x2_t) -> int64x2_t; - fn vmovl_u8(a: uint8x8_t) -> uint16x8_t; - fn vmovl_u16(a: uint16x4_t) -> uint32x4_t; - fn vmovl_u32(a: uint32x2_t) -> uint64x2_t; - fn vmvn_s8(a: int8x8_t) -> int8x8_t; - fn vmvnq_s8(a: int8x16_t) -> int8x16_t; - fn vmvn_s16(a: int16x4_t) -> int16x4_t; - fn vmvnq_s16(a: int16x8_t) -> int16x8_t; - fn vmvn_s32(a: int32x2_t) -> int32x2_t; - fn vmvnq_s32(a: int32x4_t) -> int32x4_t; - fn vmvn_u8(a: uint8x8_t) -> uint8x8_t; - fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t; - fn vmvn_u16(a: uint16x4_t) -> uint16x4_t; - fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t; - fn vmvn_u32(a: uint32x2_t) -> uint32x2_t; - fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t; - fn vmvn_p8(a: poly8x8_t) -> poly8x8_t; - fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t; - fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; - fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t; - fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t; - fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t; - fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_t; - fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; - fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t; - fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_t; - fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; - fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; - fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t; - fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t; - fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t; - fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t; - fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_t; - fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8_t; - fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; - fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vgetq_lane_u64(v: uint64x2_t) -> u64; - fn vget_lane_u64(v: uint64x1_t) -> u64; - fn vget_lane_u16(v: uint16x4_t) -> u16; - fn vget_lane_s16(v: int16x4_t) -> i16; - fn vget_lane_p16(v: poly16x4_t) -> p16; - fn vget_lane_u32(v: uint32x2_t) -> u32; - fn vget_lane_s32(v: int32x2_t) -> i32; - fn vget_lane_f32(v: float32x2_t) -> f32; - fn vgetq_lane_f32(v: float32x4_t) -> f32; - fn vget_lane_p64(v: poly64x1_t) -> p64; - fn vgetq_lane_p64(v: poly64x2_t) -> p64; - fn vget_lane_s64(v: int64x1_t) -> i64; - fn vgetq_lane_s64(v: int64x2_t) -> i64; - fn vgetq_lane_u16(v: uint16x8_t) -> u16; - fn vgetq_lane_u32(v: uint32x4_t) -> u32; - fn vgetq_lane_s16(v: int16x8_t) -> i16; - fn vgetq_lane_p16(v: poly16x8_t) -> p16; - fn vgetq_lane_s32(v: int32x4_t) -> i32; - fn vget_lane_u8(v: uint8x8_t) -> u8; - fn vget_lane_s8(v: int8x8_t) -> i8; - fn vget_lane_p8(v: poly8x8_t) -> p8; - fn vgetq_lane_u8(v: uint8x16_t) -> u8; - fn vgetq_lane_s8(v: int8x16_t) -> i8; - fn vgetq_lane_p8(v: poly8x16_t) -> p8; - fn vget_high_s8(a: int8x16_t) -> int8x8_t; - fn vget_high_s16(a: int16x8_t) -> int16x4_t; - fn vget_high_s32(a: int32x4_t) -> int32x2_t; - fn vget_high_s64(a: int64x2_t) -> int64x1_t; - fn vget_high_u8(a: uint8x16_t) -> uint8x8_t; - fn vget_high_u16(a: uint16x8_t) -> uint16x4_t; - fn vget_high_u32(a: uint32x4_t) -> uint32x2_t; - fn vget_high_u64(a: uint64x2_t) -> uint64x1_t; - fn vget_high_p8(a: poly8x16_t) -> poly8x8_t; - fn vget_high_p16(a: poly16x8_t) -> poly16x4_t; - fn vget_high_f32(a: float32x4_t) -> float32x2_t; - fn vget_low_s8(a: int8x16_t) -> int8x8_t; - fn vget_low_s16(a: int16x8_t) -> int16x4_t; - fn vget_low_s32(a: int32x4_t) -> int32x2_t; - fn vget_low_s64(a: int64x2_t) -> int64x1_t; - fn vget_low_u8(a: uint8x16_t) -> uint8x8_t; - fn vget_low_u16(a: uint16x8_t) -> uint16x4_t; - fn vget_low_u32(a: uint32x4_t) -> uint32x2_t; - fn vget_low_u64(a: uint64x2_t) -> uint64x1_t; - fn vget_low_p8(a: poly8x16_t) -> poly8x8_t; - fn vget_low_p16(a: poly16x8_t) -> poly16x4_t; - fn vget_low_f32(a: float32x4_t) -> float32x2_t; - fn vdupq_n_s8(value: i8) -> int8x16_t; - fn vdupq_n_s16(value: i16) -> int16x8_t; - fn vdupq_n_s32(value: i32) -> int32x4_t; - fn vdupq_n_s64(value: i64) -> int64x2_t; - fn vdupq_n_u8(value: u8) -> uint8x16_t; - fn vdupq_n_u16(value: u16) -> uint16x8_t; - fn vdupq_n_u32(value: u32) -> uint32x4_t; - fn vdupq_n_u64(value: u64) -> uint64x2_t; - fn vdupq_n_p8(value: p8) -> poly8x16_t; - fn vdupq_n_p16(value: p16) -> poly16x8_t; - fn vdupq_n_f32(value: f32) -> float32x4_t; - fn vdup_n_s8(value: i8) -> int8x8_t; - fn vdup_n_s16(value: i16) -> int16x4_t; - fn vdup_n_s32(value: i32) -> int32x2_t; - fn vdup_n_s64(value: i64) -> int64x1_t; - fn vdup_n_u8(value: u8) -> uint8x8_t; - fn vdup_n_u16(value: u16) -> uint16x4_t; - fn vdup_n_u32(value: u32) -> uint32x2_t; - fn vdup_n_u64(value: u64) -> uint64x1_t; - fn vdup_n_p8(value: p8) -> poly8x8_t; - fn vdup_n_p16(value: p16) -> poly16x4_t; - fn vdup_n_f32(value: f32) -> float32x2_t; - unsafe fn vldrq_p128(a: *const p128) -> p128; - unsafe fn vstrq_p128(a: *mut p128, b: p128); - fn vmov_n_s8(value: i8) -> int8x8_t; - fn vmov_n_s16(value: i16) -> int16x4_t; - fn vmov_n_s32(value: i32) -> int32x2_t; - fn vmov_n_s64(value: i64) -> int64x1_t; - fn vmov_n_u8(value: u8) -> uint8x8_t; - fn vmov_n_u16(value: u16) -> uint16x4_t; - fn vmov_n_u32(value: u32) -> uint32x2_t; - fn vmov_n_u64(value: u64) -> uint64x1_t; - fn vmov_n_p8(value: p8) -> poly8x8_t; - fn vmov_n_p16(value: p16) -> poly16x4_t; - fn vmov_n_f32(value: f32) -> float32x2_t; - fn vmovq_n_s8(value: i8) -> int8x16_t; - fn vmovq_n_s16(value: i16) -> int16x8_t; - fn vmovq_n_s32(value: i32) -> int32x4_t; - fn vmovq_n_s64(value: i64) -> int64x2_t; - fn vmovq_n_u8(value: u8) -> uint8x16_t; - fn vmovq_n_u16(value: u16) -> uint16x8_t; - fn vmovq_n_u32(value: u32) -> uint32x4_t; - fn vmovq_n_u64(value: u64) -> uint64x2_t; - fn vmovq_n_p8(value: p8) -> poly8x16_t; - fn vmovq_n_p16(value: p16) -> poly16x8_t; - fn vmovq_n_f32(value: f32) -> float32x4_t; - fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t; - fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t; - fn vcnt_s8(a: int8x8_t) -> int8x8_t; - fn vcntq_s8(a: int8x16_t) -> int8x16_t; - fn vcnt_u8(a: uint8x8_t) -> uint8x8_t; - fn vcntq_u8(a: uint8x16_t) -> uint8x16_t; - fn vcnt_p8(a: poly8x8_t) -> poly8x8_t; - fn vcntq_p8(a: poly8x16_t) -> poly8x16_t; - fn vrev16_s8(a: int8x8_t) -> int8x8_t; - fn vrev16q_s8(a: int8x16_t) -> int8x16_t; - fn vrev16_u8(a: uint8x8_t) -> uint8x8_t; - fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t; - fn vrev16_p8(a: poly8x8_t) -> poly8x8_t; - fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t; - fn vrev32_s8(a: int8x8_t) -> int8x8_t; - fn vrev32q_s8(a: int8x16_t) -> int8x16_t; - fn vrev32_u8(a: uint8x8_t) -> uint8x8_t; - fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t; - fn vrev32_s16(a: int16x4_t) -> int16x4_t; - fn vrev32q_s16(a: int16x8_t) -> int16x8_t; - fn vrev32_p16(a: poly16x4_t) -> poly16x4_t; - fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t; - fn vrev32_u16(a: uint16x4_t) -> uint16x4_t; - fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t; - fn vrev32_p8(a: poly8x8_t) -> poly8x8_t; - fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t; - fn vrev64_s8(a: int8x8_t) -> int8x8_t; - fn vrev64q_s8(a: int8x16_t) -> int8x16_t; - fn vrev64_s16(a: int16x4_t) -> int16x4_t; - fn vrev64q_s16(a: int16x8_t) -> int16x8_t; - fn vrev64_s32(a: int32x2_t) -> int32x2_t; - fn vrev64q_s32(a: int32x4_t) -> int32x4_t; - fn vrev64_u8(a: uint8x8_t) -> uint8x8_t; - fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t; - fn vrev64_u16(a: uint16x4_t) -> uint16x4_t; - fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t; - fn vrev64_u32(a: uint32x2_t) -> uint32x2_t; - fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t; - fn vrev64_f32(a: float32x2_t) -> float32x2_t; - fn vrev64q_f32(a: float32x4_t) -> float32x4_t; - fn vrev64_p8(a: poly8x8_t) -> poly8x8_t; - fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t; - fn vrev64_p16(a: poly16x4_t) -> poly16x4_t; - fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t; - fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t; - fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t; - fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t; - fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t; - fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t; - fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t; - fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; - fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; - fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; - fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; - fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; - fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; - fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t; - fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t; - fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t; - fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t; - fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t; - fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t; - fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t; - fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t; - fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t; - fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t; - fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t; - fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t; - - fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vabds_f32(a: f32, b: f32) -> f32; - fn vabdd_f64(a: f64, b: f64) -> f64; - fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t; - fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t; - fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t; - fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t; - fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t; - fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t; - fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t; - fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t; - fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vceqd_s64(a: i64, b: i64) -> u64; - fn vceqd_u64(a: u64, b: u64) -> u64; - fn vceqs_f32(a: f32, b: f32) -> u32; - fn vceqd_f64(a: f64, b: f64) -> u64; - fn vceqz_s8(a: int8x8_t) -> uint8x8_t; - fn vceqzq_s8(a: int8x16_t) -> uint8x16_t; - fn vceqz_s16(a: int16x4_t) -> uint16x4_t; - fn vceqzq_s16(a: int16x8_t) -> uint16x8_t; - fn vceqz_s32(a: int32x2_t) -> uint32x2_t; - fn vceqzq_s32(a: int32x4_t) -> uint32x4_t; - fn vceqz_s64(a: int64x1_t) -> uint64x1_t; - fn vceqzq_s64(a: int64x2_t) -> uint64x2_t; - fn vceqz_p8(a: poly8x8_t) -> uint8x8_t; - fn vceqzq_p8(a: poly8x16_t) -> uint8x16_t; - fn vceqz_p64(a: poly64x1_t) -> uint64x1_t; - fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t; - fn vceqz_u8(a: uint8x8_t) -> uint8x8_t; - fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t; - fn vceqz_u16(a: uint16x4_t) -> uint16x4_t; - fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t; - fn vceqz_u32(a: uint32x2_t) -> uint32x2_t; - fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t; - fn vceqz_u64(a: uint64x1_t) -> uint64x1_t; - fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t; - fn vceqz_f32(a: float32x2_t) -> uint32x2_t; - fn vceqzq_f32(a: float32x4_t) -> uint32x4_t; - fn vceqz_f64(a: float64x1_t) -> uint64x1_t; - fn vceqzq_f64(a: float64x2_t) -> uint64x2_t; - fn vceqzd_s64(a: i64) -> u64; - fn vceqzd_u64(a: u64) -> u64; - fn vceqzs_f32(a: f32) -> u32; - fn vceqzd_f64(a: f64) -> u64; - fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t; - fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t; - fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t; - fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t; - fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vtstd_s64(a: i64, b: i64) -> u64; - fn vtstd_u64(a: u64, b: u64) -> u64; - fn vuqadds_s32(a: i32, b: u32) -> i32; - fn vuqaddd_s64(a: i64, b: u64) -> i64; - fn vuqaddb_s8(a: i8, b: u8) -> i8; - fn vuqaddh_s16(a: i16, b: u16) -> i16; - fn vabs_f64(a: float64x1_t) -> float64x1_t; - fn vabsq_f64(a: float64x2_t) -> float64x2_t; - fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t; - fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t; - fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcgtd_s64(a: i64, b: i64) -> u64; - fn vcgtd_u64(a: u64, b: u64) -> u64; - fn vcgts_f32(a: f32, b: f32) -> u32; - fn vcgtd_f64(a: f64, b: f64) -> u64; - fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t; - fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t; - fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcltd_s64(a: i64, b: i64) -> u64; - fn vcltd_u64(a: u64, b: u64) -> u64; - fn vclts_f32(a: f32, b: f32) -> u32; - fn vcltd_f64(a: f64, b: f64) -> u64; - fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t; - fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t; - fn vcged_s64(a: i64, b: i64) -> u64; - fn vcged_u64(a: u64, b: u64) -> u64; - fn vcges_f32(a: f32, b: f32) -> u32; - fn vcged_f64(a: f64, b: f64) -> u64; - fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcled_s64(a: i64, b: i64) -> u64; - fn vcled_u64(a: u64, b: u64) -> u64; - fn vcles_f32(a: f32, b: f32) -> u32; - fn vcled_f64(a: f64, b: f64) -> u64; - fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t; - fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t; - fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcgez_s8(a: int8x8_t) -> uint8x8_t; - fn vcgezq_s8(a: int8x16_t) -> uint8x16_t; - fn vcgez_s16(a: int16x4_t) -> uint16x4_t; - fn vcgezq_s16(a: int16x8_t) -> uint16x8_t; - fn vcgez_s32(a: int32x2_t) -> uint32x2_t; - fn vcgezq_s32(a: int32x4_t) -> uint32x4_t; - fn vcgez_s64(a: int64x1_t) -> uint64x1_t; - fn vcgezq_s64(a: int64x2_t) -> uint64x2_t; - fn vcgez_f32(a: float32x2_t) -> uint32x2_t; - fn vcgezq_f32(a: float32x4_t) -> uint32x4_t; - fn vcgez_f64(a: float64x1_t) -> uint64x1_t; - fn vcgezq_f64(a: float64x2_t) -> uint64x2_t; - fn vcgezd_s64(a: i64) -> u64; - fn vcgezs_f32(a: f32) -> u32; - fn vcgezd_f64(a: f64) -> u64; - fn vcgtz_s8(a: int8x8_t) -> uint8x8_t; - fn vcgtzq_s8(a: int8x16_t) -> uint8x16_t; - fn vcgtz_s16(a: int16x4_t) -> uint16x4_t; - fn vcgtzq_s16(a: int16x8_t) -> uint16x8_t; - fn vcgtz_s32(a: int32x2_t) -> uint32x2_t; - fn vcgtzq_s32(a: int32x4_t) -> uint32x4_t; - fn vcgtz_s64(a: int64x1_t) -> uint64x1_t; - fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t; - fn vcgtz_f32(a: float32x2_t) -> uint32x2_t; - fn vcgtzq_f32(a: float32x4_t) -> uint32x4_t; - fn vcgtz_f64(a: float64x1_t) -> uint64x1_t; - fn vcgtzq_f64(a: float64x2_t) -> uint64x2_t; - fn vcgtzd_s64(a: i64) -> u64; - fn vcgtzs_f32(a: f32) -> u32; - fn vcgtzd_f64(a: f64) -> u64; - fn vclez_s8(a: int8x8_t) -> uint8x8_t; - fn vclezq_s8(a: int8x16_t) -> uint8x16_t; - fn vclez_s16(a: int16x4_t) -> uint16x4_t; - fn vclezq_s16(a: int16x8_t) -> uint16x8_t; - fn vclez_s32(a: int32x2_t) -> uint32x2_t; - fn vclezq_s32(a: int32x4_t) -> uint32x4_t; - fn vclez_s64(a: int64x1_t) -> uint64x1_t; - fn vclezq_s64(a: int64x2_t) -> uint64x2_t; - fn vclez_f32(a: float32x2_t) -> uint32x2_t; - fn vclezq_f32(a: float32x4_t) -> uint32x4_t; - fn vclez_f64(a: float64x1_t) -> uint64x1_t; - fn vclezq_f64(a: float64x2_t) -> uint64x2_t; - fn vclezd_s64(a: i64) -> u64; - fn vclezs_f32(a: f32) -> u32; - fn vclezd_f64(a: f64) -> u64; - fn vcltz_s8(a: int8x8_t) -> uint8x8_t; - fn vcltzq_s8(a: int8x16_t) -> uint8x16_t; - fn vcltz_s16(a: int16x4_t) -> uint16x4_t; - fn vcltzq_s16(a: int16x8_t) -> uint16x8_t; - fn vcltz_s32(a: int32x2_t) -> uint32x2_t; - fn vcltzq_s32(a: int32x4_t) -> uint32x4_t; - fn vcltz_s64(a: int64x1_t) -> uint64x1_t; - fn vcltzq_s64(a: int64x2_t) -> uint64x2_t; - fn vcltz_f32(a: float32x2_t) -> uint32x2_t; - fn vcltzq_f32(a: float32x4_t) -> uint32x4_t; - fn vcltz_f64(a: float64x1_t) -> uint64x1_t; - fn vcltzq_f64(a: float64x2_t) -> uint64x2_t; - fn vcltzd_s64(a: i64) -> u64; - fn vcltzs_f32(a: f32) -> u32; - fn vcltzd_f64(a: f64) -> u64; - fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcagts_f32(a: f32, b: f32) -> u32; - fn vcagtd_f64(a: f64, b: f64) -> u64; - fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcages_f32(a: f32, b: f32) -> u32; - fn vcaged_f64(a: f64, b: f64) -> u64; - fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcalts_f32(a: f32, b: f32) -> u32; - fn vcaltd_f64(a: f64, b: f64) -> u64; - fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t; - fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t; - fn vcales_f32(a: f32, b: f32) -> u32; - fn vcaled_f64(a: f64, b: f64) -> u64; - fn vcopy_lane_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vcopyq_laneq_s8( - a: int8x16_t, - b: int8x16_t, - ) -> int8x16_t; - fn vcopy_lane_s16( - a: int16x4_t, - b: int16x4_t, - ) -> int16x4_t; - fn vcopyq_laneq_s16( - a: int16x8_t, - b: int16x8_t, - ) -> int16x8_t; - fn vcopy_lane_s32( - a: int32x2_t, - b: int32x2_t, - ) -> int32x2_t; - fn vcopyq_laneq_s32( - a: int32x4_t, - b: int32x4_t, - ) -> int32x4_t; - fn vcopyq_laneq_s64( - a: int64x2_t, - b: int64x2_t, - ) -> int64x2_t; - fn vcopy_lane_u8( - a: uint8x8_t, - b: uint8x8_t, - ) -> uint8x8_t; - fn vcopyq_laneq_u8( - a: uint8x16_t, - b: uint8x16_t, - ) -> uint8x16_t; - fn vcopy_lane_u16( - a: uint16x4_t, - b: uint16x4_t, - ) -> uint16x4_t; - fn vcopyq_laneq_u16( - a: uint16x8_t, - b: uint16x8_t, - ) -> uint16x8_t; - fn vcopy_lane_u32( - a: uint32x2_t, - b: uint32x2_t, - ) -> uint32x2_t; - fn vcopyq_laneq_u32( - a: uint32x4_t, - b: uint32x4_t, - ) -> uint32x4_t; - fn vcopyq_laneq_u64( - a: uint64x2_t, - b: uint64x2_t, - ) -> uint64x2_t; - fn vcopy_lane_p8( - a: poly8x8_t, - b: poly8x8_t, - ) -> poly8x8_t; - fn vcopyq_laneq_p8( - a: poly8x16_t, - b: poly8x16_t, - ) -> poly8x16_t; - fn vcopy_lane_p16( - a: poly16x4_t, - b: poly16x4_t, - ) -> poly16x4_t; - fn vcopyq_laneq_p16( - a: poly16x8_t, - b: poly16x8_t, - ) -> poly16x8_t; - fn vcopyq_laneq_p64( - a: poly64x2_t, - b: poly64x2_t, - ) -> poly64x2_t; - fn vcopy_lane_f32( - a: float32x2_t, - b: float32x2_t, - ) -> float32x2_t; - fn vcopyq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - ) -> float32x4_t; - fn vcopyq_laneq_f64( - a: float64x2_t, - b: float64x2_t, - ) -> float64x2_t; - fn vcopy_laneq_s8( - a: int8x8_t, - b: int8x16_t, - ) -> int8x8_t; - fn vcopy_laneq_s16( - a: int16x4_t, - b: int16x8_t, - ) -> int16x4_t; - fn vcopy_laneq_s32( - a: int32x2_t, - b: int32x4_t, - ) -> int32x2_t; - fn vcopy_laneq_u8( - a: uint8x8_t, - b: uint8x16_t, - ) -> uint8x8_t; - fn vcopy_laneq_u16( - a: uint16x4_t, - b: uint16x8_t, - ) -> uint16x4_t; - fn vcopy_laneq_u32( - a: uint32x2_t, - b: uint32x4_t, - ) -> uint32x2_t; - fn vcopy_laneq_p8( - a: poly8x8_t, - b: poly8x16_t, - ) -> poly8x8_t; - fn vcopy_laneq_p16( - a: poly16x4_t, - b: poly16x8_t, - ) -> poly16x4_t; - fn vcopy_laneq_f32( - a: float32x2_t, - b: float32x4_t, - ) -> float32x2_t; - fn vcopyq_lane_s8( - a: int8x16_t, - b: int8x8_t, - ) -> int8x16_t; - fn vcopyq_lane_s16( - a: int16x8_t, - b: int16x4_t, - ) -> int16x8_t; - fn vcopyq_lane_s32( - a: int32x4_t, - b: int32x2_t, - ) -> int32x4_t; - fn vcopyq_lane_u8( - a: uint8x16_t, - b: uint8x8_t, - ) -> uint8x16_t; - fn vcopyq_lane_u16( - a: uint16x8_t, - b: uint16x4_t, - ) -> uint16x8_t; - fn vcopyq_lane_u32( - a: uint32x4_t, - b: uint32x2_t, - ) -> uint32x4_t; - fn vcopyq_lane_p8( - a: poly8x16_t, - b: poly8x8_t, - ) -> poly8x16_t; - fn vcopyq_lane_p16( - a: poly16x8_t, - b: poly16x4_t, - ) -> poly16x8_t; - fn vcopyq_lane_s64( - a: int64x2_t, - b: int64x1_t, - ) -> int64x2_t; - fn vcopyq_lane_u64( - a: uint64x2_t, - b: uint64x1_t, - ) -> uint64x2_t; - fn vcopyq_lane_p64( - a: poly64x2_t, - b: poly64x1_t, - ) -> poly64x2_t; - fn vcopyq_lane_f32( - a: float32x4_t, - b: float32x2_t, - ) -> float32x4_t; - fn vcopyq_lane_f64( - a: float64x2_t, - b: float64x1_t, - ) -> float64x2_t; - fn vcreate_f64(a: u64) -> float64x1_t; - fn vcvt_f64_s64(a: int64x1_t) -> float64x1_t; - fn vcvtq_f64_s64(a: int64x2_t) -> float64x2_t; - fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t; - fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t; - fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t; - fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t; - fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t; - fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t; - fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t; - fn vcvtxd_f32_f64(a: f64) -> f32; - fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t; - fn vcvt_n_f64_s64(a: int64x1_t) -> float64x1_t; - fn vcvtq_n_f64_s64(a: int64x2_t) -> float64x2_t; - fn vcvts_n_f32_s32(a: i32) -> f32; - fn vcvtd_n_f64_s64(a: i64) -> f64; - fn vcvt_n_f64_u64(a: uint64x1_t) -> float64x1_t; - fn vcvtq_n_f64_u64(a: uint64x2_t) -> float64x2_t; - fn vcvts_n_f32_u32(a: u32) -> f32; - fn vcvtd_n_f64_u64(a: u64) -> f64; - fn vcvt_n_s64_f64(a: float64x1_t) -> int64x1_t; - fn vcvtq_n_s64_f64(a: float64x2_t) -> int64x2_t; - fn vcvts_n_s32_f32(a: f32) -> i32; - fn vcvtd_n_s64_f64(a: f64) -> i64; - fn vcvt_n_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vcvtq_n_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vcvts_n_u32_f32(a: f32) -> u32; - fn vcvtd_n_u64_f64(a: f64) -> u64; - fn vcvts_f32_s32(a: i32) -> f32; - fn vcvtd_f64_s64(a: i64) -> f64; - fn vcvts_f32_u32(a: u32) -> f32; - fn vcvtd_f64_u64(a: u64) -> f64; - fn vcvts_s32_f32(a: f32) -> i32; - fn vcvtd_s64_f64(a: f64) -> i64; - fn vcvts_u32_f32(a: f32) -> u32; - fn vcvtd_u64_f64(a: f64) -> u64; - fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t; - fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t; - fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t; - fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t; - fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t; - fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t; - fn vcvtas_s32_f32(a: f32) -> i32; - fn vcvtad_s64_f64(a: f64) -> i64; - fn vcvtas_u32_f32(a: f32) -> u32; - fn vcvtad_u64_f64(a: f64) -> u64; - fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t; - fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t; - fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t; - fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t; - fn vcvtns_s32_f32(a: f32) -> i32; - fn vcvtnd_s64_f64(a: f64) -> i64; - fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t; - fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t; - fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t; - fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t; - fn vcvtms_s32_f32(a: f32) -> i32; - fn vcvtmd_s64_f64(a: f64) -> i64; - fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t; - fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t; - fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t; - fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t; - fn vcvtps_s32_f32(a: f32) -> i32; - fn vcvtpd_s64_f64(a: f64) -> i64; - fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vcvtns_u32_f32(a: f32) -> u32; - fn vcvtnd_u64_f64(a: f64) -> u64; - fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vcvtms_u32_f32(a: f32) -> u32; - fn vcvtmd_u64_f64(a: f64) -> u64; - fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t; - fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t; - fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vcvtps_u32_f32(a: f32) -> u32; - fn vcvtpd_u64_f64(a: f64) -> u64; - fn vdupq_laneq_p64(a: poly64x2_t) -> poly64x2_t; - fn vdupq_lane_p64(a: poly64x1_t) -> poly64x2_t; - fn vdupq_laneq_f64(a: float64x2_t) -> float64x2_t; - fn vdupq_lane_f64(a: float64x1_t) -> float64x2_t; - fn vdup_lane_p64(a: poly64x1_t) -> poly64x1_t; - fn vdup_lane_f64(a: float64x1_t) -> float64x1_t; - fn vdup_laneq_p64(a: poly64x2_t) -> poly64x1_t; - fn vdup_laneq_f64(a: float64x2_t) -> float64x1_t; - fn vdupb_lane_s8(a: int8x8_t) -> i8; - fn vdupb_laneq_s8(a: int8x16_t) -> i8; - fn vduph_lane_s16(a: int16x4_t) -> i16; - fn vduph_laneq_s16(a: int16x8_t) -> i16; - fn vdups_lane_s32(a: int32x2_t) -> i32; - fn vdups_laneq_s32(a: int32x4_t) -> i32; - fn vdupd_lane_s64(a: int64x1_t) -> i64; - fn vdupd_laneq_s64(a: int64x2_t) -> i64; - fn vdupb_lane_u8(a: uint8x8_t) -> u8; - fn vdupb_laneq_u8(a: uint8x16_t) -> u8; - fn vduph_lane_u16(a: uint16x4_t) -> u16; - fn vduph_laneq_u16(a: uint16x8_t) -> u16; - fn vdups_lane_u32(a: uint32x2_t) -> u32; - fn vdups_laneq_u32(a: uint32x4_t) -> u32; - fn vdupd_lane_u64(a: uint64x1_t) -> u64; - fn vdupd_laneq_u64(a: uint64x2_t) -> u64; - fn vdupb_lane_p8(a: poly8x8_t) -> p8; - fn vdupb_laneq_p8(a: poly8x16_t) -> p8; - fn vduph_lane_p16(a: poly16x4_t) -> p16; - fn vduph_laneq_p16(a: poly16x8_t) -> p16; - fn vdups_lane_f32(a: float32x2_t) -> f32; - fn vdups_laneq_f32(a: float32x4_t) -> f32; - fn vdupd_lane_f64(a: float64x1_t) -> f64; - fn vdupd_laneq_f64(a: float64x2_t) -> f64; - fn vextq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vextq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t; - fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; - fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t; - fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t; - fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t; - fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t; - fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t; - fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t; - fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t; - fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t; - fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t; - fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t; - fn vmlal_high_lane_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x4_t, - ) -> int32x4_t; - fn vmlal_high_laneq_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x8_t, - ) -> int32x4_t; - fn vmlal_high_lane_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x2_t, - ) -> int64x2_t; - fn vmlal_high_laneq_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x4_t, - ) -> int64x2_t; - fn vmlal_high_lane_u16( - a: uint32x4_t, - b: uint16x8_t, - c: uint16x4_t, - ) -> uint32x4_t; - fn vmlal_high_laneq_u16( - a: uint32x4_t, - b: uint16x8_t, - c: uint16x8_t, - ) -> uint32x4_t; - fn vmlal_high_lane_u32( - a: uint64x2_t, - b: uint32x4_t, - c: uint32x2_t, - ) -> uint64x2_t; - fn vmlal_high_laneq_u32( - a: uint64x2_t, - b: uint32x4_t, - c: uint32x4_t, - ) -> uint64x2_t; - fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t; - fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; - fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t; - fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t; - fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t; - fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t; - fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t; - fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t; - fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t; - fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t; - fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t; - fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t; - fn vmlsl_high_lane_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x4_t, - ) -> int32x4_t; - fn vmlsl_high_laneq_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x8_t, - ) -> int32x4_t; - fn vmlsl_high_lane_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x2_t, - ) -> int64x2_t; - fn vmlsl_high_laneq_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x4_t, - ) -> int64x2_t; - fn vmlsl_high_lane_u16( - a: uint32x4_t, - b: uint16x8_t, - c: uint16x4_t, - ) -> uint32x4_t; - fn vmlsl_high_laneq_u16( - a: uint32x4_t, - b: uint16x8_t, - c: uint16x8_t, - ) -> uint32x4_t; - fn vmlsl_high_lane_u32( - a: uint64x2_t, - b: uint32x4_t, - c: uint32x2_t, - ) -> uint64x2_t; - fn vmlsl_high_laneq_u32( - a: uint64x2_t, - b: uint32x4_t, - c: uint32x4_t, - ) -> uint64x2_t; - fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t; - fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t; - fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t; - fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t; - fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t; - fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t; - fn vneg_s64(a: int64x1_t) -> int64x1_t; - fn vnegq_s64(a: int64x2_t) -> int64x2_t; - fn vnegd_s64(a: i64) -> i64; - fn vneg_f64(a: float64x1_t) -> float64x1_t; - fn vnegq_f64(a: float64x2_t) -> float64x2_t; - fn vqneg_s64(a: int64x1_t) -> int64x1_t; - fn vqnegq_s64(a: int64x2_t) -> int64x2_t; - fn vqnegb_s8(a: i8) -> i8; - fn vqnegh_s16(a: i16) -> i16; - fn vqnegs_s32(a: i32) -> i32; - fn vqnegd_s64(a: i64) -> i64; - fn vqsubb_s8(a: i8, b: i8) -> i8; - fn vqsubh_s16(a: i16, b: i16) -> i16; - fn vqsubb_u8(a: u8, b: u8) -> u8; - fn vqsubh_u16(a: u16, b: u16) -> u16; - fn vqsubs_u32(a: u32, b: u32) -> u32; - fn vqsubd_u64(a: u64, b: u64) -> u64; - fn vqsubs_s32(a: i32, b: i32) -> i32; - fn vqsubd_s64(a: i64, b: i64) -> i64; - fn vrbit_s8(a: int8x8_t) -> int8x8_t; - fn vrbitq_s8(a: int8x16_t) -> int8x16_t; - fn vrbit_u8(a: uint8x8_t) -> uint8x8_t; - fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t; - fn vrbit_p8(a: poly8x8_t) -> poly8x8_t; - fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t; - fn vrndx_f32(a: float32x2_t) -> float32x2_t; - fn vrndxq_f32(a: float32x4_t) -> float32x4_t; - fn vrndx_f64(a: float64x1_t) -> float64x1_t; - fn vrndxq_f64(a: float64x2_t) -> float64x2_t; - fn vrnda_f32(a: float32x2_t) -> float32x2_t; - fn vrndaq_f32(a: float32x4_t) -> float32x4_t; - fn vrnda_f64(a: float64x1_t) -> float64x1_t; - fn vrndaq_f64(a: float64x2_t) -> float64x2_t; - fn vrndn_f64(a: float64x1_t) -> float64x1_t; - fn vrndnq_f64(a: float64x2_t) -> float64x2_t; - fn vrndns_f32(a: f32) -> f32; - fn vrndm_f32(a: float32x2_t) -> float32x2_t; - fn vrndmq_f32(a: float32x4_t) -> float32x4_t; - fn vrndm_f64(a: float64x1_t) -> float64x1_t; - fn vrndmq_f64(a: float64x2_t) -> float64x2_t; - fn vrndp_f32(a: float32x2_t) -> float32x2_t; - fn vrndpq_f32(a: float32x4_t) -> float32x4_t; - fn vrndp_f64(a: float64x1_t) -> float64x1_t; - fn vrndpq_f64(a: float64x2_t) -> float64x2_t; - fn vrnd_f32(a: float32x2_t) -> float32x2_t; - fn vrndq_f32(a: float32x4_t) -> float32x4_t; - fn vrnd_f64(a: float64x1_t) -> float64x1_t; - fn vrndq_f64(a: float64x2_t) -> float64x2_t; - fn vrndi_f32(a: float32x2_t) -> float32x2_t; - fn vrndiq_f32(a: float32x4_t) -> float32x4_t; - fn vrndi_f64(a: float64x1_t) -> float64x1_t; - fn vrndiq_f64(a: float64x2_t) -> float64x2_t; - fn vqaddb_s8(a: i8, b: i8) -> i8; - fn vqaddh_s16(a: i16, b: i16) -> i16; - fn vqaddb_u8(a: u8, b: u8) -> u8; - fn vqaddh_u16(a: u16, b: u16) -> u16; - fn vqadds_u32(a: u32, b: u32) -> u32; - fn vqaddd_u64(a: u64, b: u64) -> u64; - fn vqadds_s32(a: i32, b: i32) -> i32; - fn vqaddd_s64(a: i64, b: i64) -> i64; - unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t; - unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t; - unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t; - unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t; - unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t; - unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t; - unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t; - unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t; - unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t; - unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t; - unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t; - unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t; - unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t; - unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t; - unsafe fn vld2q_lane_s8(a: *const i8, b: int8x16x2_t) -> int8x16x2_t; - unsafe fn vld2_lane_s64(a: *const i64, b: int64x1x2_t) -> int64x1x2_t; - unsafe fn vld2q_lane_s64(a: *const i64, b: int64x2x2_t) -> int64x2x2_t; - unsafe fn vld2q_lane_u8(a: *const u8, b: uint8x16x2_t) -> uint8x16x2_t; - unsafe fn vld2_lane_u64(a: *const u64, b: uint64x1x2_t) -> uint64x1x2_t; - unsafe fn vld2q_lane_u64(a: *const u64, b: uint64x2x2_t) -> uint64x2x2_t; - unsafe fn vld2q_lane_p8(a: *const p8, b: poly8x16x2_t) -> poly8x16x2_t; - unsafe fn vld2_lane_f64(a: *const f64, b: float64x1x2_t) -> float64x1x2_t; - unsafe fn vld2q_lane_f64(a: *const f64, b: float64x2x2_t) - -> float64x2x2_t; - unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t; - unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t; - unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t; - unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t; - unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t; - unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t; - unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t; - unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t; - unsafe fn vld3q_lane_s8(a: *const i8, b: int8x16x3_t) -> int8x16x3_t; - unsafe fn vld3_lane_s64(a: *const i64, b: int64x1x3_t) -> int64x1x3_t; - unsafe fn vld3q_lane_s64(a: *const i64, b: int64x2x3_t) -> int64x2x3_t; - unsafe fn vld3q_lane_p8(a: *const p8, b: poly8x16x3_t) -> poly8x16x3_t; - unsafe fn vld3q_lane_u8(a: *const u8, b: uint8x16x3_t) -> uint8x16x3_t; - unsafe fn vld3_lane_u64(a: *const u64, b: uint64x1x3_t) -> uint64x1x3_t; - unsafe fn vld3q_lane_u64(a: *const u64, b: uint64x2x3_t) -> uint64x2x3_t; - unsafe fn vld3_lane_f64(a: *const f64, b: float64x1x3_t) -> float64x1x3_t; - unsafe fn vld3q_lane_f64(a: *const f64, b: float64x2x3_t) - -> float64x2x3_t; - unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t; - unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t; - unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t; - unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t; - unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t; - unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t; - unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t; - unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t; - unsafe fn vld4q_lane_s8(a: *const i8, b: int8x16x4_t) -> int8x16x4_t; - unsafe fn vld4_lane_s64(a: *const i64, b: int64x1x4_t) -> int64x1x4_t; - unsafe fn vld4q_lane_s64(a: *const i64, b: int64x2x4_t) -> int64x2x4_t; - unsafe fn vld4q_lane_p8(a: *const p8, b: poly8x16x4_t) -> poly8x16x4_t; - unsafe fn vld4q_lane_u8(a: *const u8, b: uint8x16x4_t) -> uint8x16x4_t; - unsafe fn vld4_lane_u64(a: *const u64, b: uint64x1x4_t) -> uint64x1x4_t; - unsafe fn vld4q_lane_u64(a: *const u64, b: uint64x2x4_t) -> uint64x2x4_t; - unsafe fn vld4_lane_f64(a: *const f64, b: float64x1x4_t) -> float64x1x4_t; - unsafe fn vld4q_lane_f64(a: *const f64, b: float64x2x4_t) - -> float64x2x4_t; - unsafe fn vst1_lane_f64(a: *mut f64, b: float64x1_t); - unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t); - unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t); - unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t); - unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t); - unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t); - unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t); - unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t); - unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t); - unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t); - unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t); - unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t); - unsafe fn vst2q_lane_s8(a: *mut i8, b: int8x16x2_t); - unsafe fn vst2_lane_s64(a: *mut i64, b: int64x1x2_t); - unsafe fn vst2q_lane_s64(a: *mut i64, b: int64x2x2_t); - unsafe fn vst2q_lane_u8(a: *mut u8, b: uint8x16x2_t); - unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t); - unsafe fn vst2q_lane_u64(a: *mut u64, b: uint64x2x2_t); - unsafe fn vst2q_lane_p8(a: *mut p8, b: poly8x16x2_t); - unsafe fn vst2_lane_f64(a: *mut f64, b: float64x1x2_t); - unsafe fn vst2q_lane_f64(a: *mut f64, b: float64x2x2_t); - unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t); - unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t); - unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t); - unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t); - unsafe fn vst3q_lane_s8(a: *mut i8, b: int8x16x3_t); - unsafe fn vst3_lane_s64(a: *mut i64, b: int64x1x3_t); - unsafe fn vst3q_lane_s64(a: *mut i64, b: int64x2x3_t); - unsafe fn vst3q_lane_u8(a: *mut u8, b: uint8x16x3_t); - unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t); - unsafe fn vst3q_lane_u64(a: *mut u64, b: uint64x2x3_t); - unsafe fn vst3q_lane_p8(a: *mut p8, b: poly8x16x3_t); - unsafe fn vst3_lane_f64(a: *mut f64, b: float64x1x3_t); - unsafe fn vst3q_lane_f64(a: *mut f64, b: float64x2x3_t); - unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t); - unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t); - unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t); - unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t); - unsafe fn vst4q_lane_s8(a: *mut i8, b: int8x16x4_t); - unsafe fn vst4_lane_s64(a: *mut i64, b: int64x1x4_t); - unsafe fn vst4q_lane_s64(a: *mut i64, b: int64x2x4_t); - unsafe fn vst4q_lane_u8(a: *mut u8, b: uint8x16x4_t); - unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t); - unsafe fn vst4q_lane_u64(a: *mut u64, b: uint64x2x4_t); - unsafe fn vst4q_lane_p8(a: *mut p8, b: poly8x16x4_t); - unsafe fn vst4_lane_f64(a: *mut f64, b: float64x1x4_t); - unsafe fn vst4q_lane_f64(a: *mut f64, b: float64x2x4_t); - fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t; - fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t; - fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vmul_laneq_f64(a: float64x1_t, b: float64x2_t) -> float64x1_t; - fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t; - fn vmulq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmuls_lane_f32(a: f32, b: float32x2_t) -> f32; - fn vmuls_laneq_f32(a: f32, b: float32x4_t) -> f32; - fn vmuld_lane_f64(a: f64, b: float64x1_t) -> f64; - fn vmuld_laneq_f64(a: f64, b: float64x2_t) -> f64; - fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t; - fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t; - fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t; - fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t; - fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t; - fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t; - fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t; - fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t; - fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t; - fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t; - fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t; - fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t; - fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t; - fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t; - fn vmull_high_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t; - fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmulx_lane_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vmulx_laneq_f64(a: float64x1_t, b: float64x2_t) -> float64x1_t; - fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t; - fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t; - fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vmulxq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t; - fn vmulxq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmulxs_f32(a: f32, b: f32) -> f32; - fn vmulxd_f64(a: f64, b: f64) -> f64; - fn vmulxs_lane_f32(a: f32, b: float32x2_t) -> f32; - fn vmulxs_laneq_f32(a: f32, b: float32x4_t) -> f32; - fn vmulxd_lane_f64(a: f64, b: float64x1_t) -> f64; - fn vmulxd_laneq_f64(a: f64, b: float64x2_t) -> f64; - fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t; - fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; - fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t; - fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t; - fn vfma_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - ) -> float32x2_t; - fn vfma_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, - ) -> float32x2_t; - fn vfmaq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, - ) -> float32x4_t; - fn vfmaq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - ) -> float32x4_t; - fn vfma_lane_f64( - a: float64x1_t, - b: float64x1_t, - c: float64x1_t, - ) -> float64x1_t; - fn vfma_laneq_f64( - a: float64x1_t, - b: float64x1_t, - c: float64x2_t, - ) -> float64x1_t; - fn vfmaq_lane_f64( - a: float64x2_t, - b: float64x2_t, - c: float64x1_t, - ) -> float64x2_t; - fn vfmaq_laneq_f64( - a: float64x2_t, - b: float64x2_t, - c: float64x2_t, - ) -> float64x2_t; - fn vfmas_lane_f32(a: f32, b: f32, c: float32x2_t) -> f32; - fn vfmas_laneq_f32(a: f32, b: f32, c: float32x4_t) -> f32; - fn vfmad_lane_f64(a: f64, b: f64, c: float64x1_t) -> f64; - fn vfmad_laneq_f64(a: f64, b: f64, c: float64x2_t) -> f64; - fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t; - fn vfmsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; - fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t; - fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t; - fn vfms_lane_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x2_t, - ) -> float32x2_t; - fn vfms_laneq_f32( - a: float32x2_t, - b: float32x2_t, - c: float32x4_t, - ) -> float32x2_t; - fn vfmsq_lane_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x2_t, - ) -> float32x4_t; - fn vfmsq_laneq_f32( - a: float32x4_t, - b: float32x4_t, - c: float32x4_t, - ) -> float32x4_t; - fn vfms_lane_f64( - a: float64x1_t, - b: float64x1_t, - c: float64x1_t, - ) -> float64x1_t; - fn vfms_laneq_f64( - a: float64x1_t, - b: float64x1_t, - c: float64x2_t, - ) -> float64x1_t; - fn vfmsq_lane_f64( - a: float64x2_t, - b: float64x2_t, - c: float64x1_t, - ) -> float64x2_t; - fn vfmsq_laneq_f64( - a: float64x2_t, - b: float64x2_t, - c: float64x2_t, - ) -> float64x2_t; - fn vfmss_lane_f32(a: f32, b: f32, c: float32x2_t) -> f32; - fn vfmss_laneq_f32(a: f32, b: f32, c: float32x4_t) -> f32; - fn vfmsd_lane_f64(a: f64, b: f64, c: float64x1_t) -> f64; - fn vfmsd_laneq_f64(a: f64, b: f64, c: float64x2_t) -> f64; - fn vdiv_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vdivq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vsubd_s64(a: i64, b: i64) -> i64; - fn vsubd_u64(a: u64, b: u64) -> u64; - fn vaddv_f32(a: float32x2_t) -> f32; - fn vaddvq_f32(a: float32x4_t) -> f32; - fn vaddvq_f64(a: float64x2_t) -> f64; - fn vaddlv_s16(a: int16x4_t) -> i32; - fn vaddlvq_s16(a: int16x8_t) -> i32; - fn vaddlv_s32(a: int32x2_t) -> i64; - fn vaddlvq_s32(a: int32x4_t) -> i64; - fn vaddlv_u16(a: uint16x4_t) -> u32; - fn vaddlvq_u16(a: uint16x8_t) -> u32; - fn vaddlv_u32(a: uint32x2_t) -> u64; - fn vaddlvq_u32(a: uint32x4_t) -> u64; - fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t; - fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t; - fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t; - fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; - fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; - fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; - fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t; - fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t; - fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t; - fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t; - fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vmaxnmv_f32(a: float32x2_t) -> f32; - fn vmaxnmvq_f64(a: float64x2_t) -> f64; - fn vmaxnmvq_f32(a: float32x4_t) -> f32; - fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vpmaxnms_f32(a: float32x2_t) -> f32; - fn vpmaxnmqd_f64(a: float64x2_t) -> f64; - fn vpmaxs_f32(a: float32x2_t) -> f32; - fn vpmaxqd_f64(a: float64x2_t) -> f64; - fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vminnmv_f32(a: float32x2_t) -> f32; - fn vminnmvq_f64(a: float64x2_t) -> f64; - fn vminnmvq_f32(a: float32x4_t) -> f32; - fn vmovl_high_s8(a: int8x16_t) -> int16x8_t; - fn vmovl_high_s16(a: int16x8_t) -> int32x4_t; - fn vmovl_high_s32(a: int32x4_t) -> int64x2_t; - fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t; - fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t; - fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t; - fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vpadds_f32(a: float32x2_t) -> f32; - fn vpaddd_f64(a: float64x2_t) -> f64; - fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vpminnms_f32(a: float32x2_t) -> f32; - fn vpminnmqd_f64(a: float64x2_t) -> f64; - fn vpmins_f32(a: float32x2_t) -> f32; - fn vpminqd_f64(a: float64x2_t) -> f64; - fn vqdmullh_s16(a: i16, b: i16) -> i32; - fn vqdmulls_s32(a: i32, b: i32) -> i64; - fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t; - fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t; - fn vqdmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t; - fn vqdmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t; - fn vqdmullh_lane_s16(a: i16, b: int16x4_t) -> i32; - fn vqdmullh_laneq_s16(a: i16, b: int16x8_t) -> i32; - fn vqdmulls_lane_s32(a: i32, b: int32x2_t) -> i64; - fn vqdmulls_laneq_s32(a: i32, b: int32x4_t) -> i64; - fn vqdmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t; - fn vqdmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t; - fn vqdmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t; - fn vqdmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t; - fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t; - fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t; - fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t; - fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t; - fn vqdmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t; - fn vqdmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t; - fn vqdmlal_high_lane_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x4_t, - ) -> int32x4_t; - fn vqdmlal_high_laneq_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x8_t, - ) -> int32x4_t; - fn vqdmlal_high_lane_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x2_t, - ) -> int64x2_t; - fn vqdmlal_high_laneq_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x4_t, - ) -> int64x2_t; - fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32; - fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64; - fn vqdmlalh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32; - fn vqdmlalh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32; - fn vqdmlals_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64; - fn vqdmlals_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64; - fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t; - fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t; - fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t; - fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t; - fn vqdmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t; - fn vqdmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t; - fn vqdmlsl_high_lane_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x4_t, - ) -> int32x4_t; - fn vqdmlsl_high_laneq_s16( - a: int32x4_t, - b: int16x8_t, - c: int16x8_t, - ) -> int32x4_t; - fn vqdmlsl_high_lane_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x2_t, - ) -> int64x2_t; - fn vqdmlsl_high_laneq_s32( - a: int64x2_t, - b: int32x4_t, - c: int32x4_t, - ) -> int64x2_t; - fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32; - fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64; - fn vqdmlslh_lane_s16(a: i32, b: i16, c: int16x4_t) -> i32; - fn vqdmlslh_laneq_s16(a: i32, b: i16, c: int16x8_t) -> i32; - fn vqdmlsls_lane_s32(a: i64, b: i32, c: int32x2_t) -> i64; - fn vqdmlsls_laneq_s32(a: i64, b: i32, c: int32x4_t) -> i64; - fn vqdmulhh_s16(a: i16, b: i16) -> i16; - fn vqdmulhs_s32(a: i32, b: i32) -> i32; - fn vqdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16; - fn vqdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16; - fn vqdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32; - fn vqdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32; - fn vqdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vqdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t; - fn vqdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vqdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t; - fn vqmovnh_s16(a: i16) -> i8; - fn vqmovns_s32(a: i32) -> i16; - fn vqmovnh_u16(a: u16) -> u8; - fn vqmovns_u32(a: u32) -> u16; - fn vqmovnd_s64(a: i64) -> i32; - fn vqmovnd_u64(a: u64) -> u32; - fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t; - fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t; - fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t; - fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t; - fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t; - fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t; - fn vqmovunh_s16(a: i16) -> u8; - fn vqmovuns_s32(a: i32) -> u16; - fn vqmovund_s64(a: i64) -> u32; - fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t; - fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t; - fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t; - fn vqrdmulhh_s16(a: i16, b: i16) -> i16; - fn vqrdmulhs_s32(a: i32, b: i32) -> i32; - fn vqrdmulhh_lane_s16(a: i16, b: int16x4_t) -> i16; - fn vqrdmulhh_laneq_s16(a: i16, b: int16x8_t) -> i16; - fn vqrdmulhs_lane_s32(a: i32, b: int32x2_t) -> i32; - fn vqrdmulhs_laneq_s32(a: i32, b: int32x4_t) -> i32; - fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16; - fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32; - fn vqrdmlah_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - ) -> int16x4_t; - fn vqrdmlah_laneq_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x8_t, - ) -> int16x4_t; - fn vqrdmlahq_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x4_t, - ) -> int16x8_t; - fn vqrdmlahq_laneq_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - ) -> int16x8_t; - fn vqrdmlah_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - ) -> int32x2_t; - fn vqrdmlah_laneq_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x4_t, - ) -> int32x2_t; - fn vqrdmlahq_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x2_t, - ) -> int32x4_t; - fn vqrdmlahq_laneq_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - ) -> int32x4_t; - fn vqrdmlahh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16; - fn vqrdmlahh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16; - fn vqrdmlahs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32; - fn vqrdmlahs_laneq_s32(a: i32, b: i32, c: int32x4_t) -> i32; - fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; - fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; - fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; - fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; - fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16; - fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32; - fn vqrdmlsh_lane_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x4_t, - ) -> int16x4_t; - fn vqrdmlsh_laneq_s16( - a: int16x4_t, - b: int16x4_t, - c: int16x8_t, - ) -> int16x4_t; - fn vqrdmlshq_lane_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x4_t, - ) -> int16x8_t; - fn vqrdmlshq_laneq_s16( - a: int16x8_t, - b: int16x8_t, - c: int16x8_t, - ) -> int16x8_t; - fn vqrdmlsh_lane_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x2_t, - ) -> int32x2_t; - fn vqrdmlsh_laneq_s32( - a: int32x2_t, - b: int32x2_t, - c: int32x4_t, - ) -> int32x2_t; - fn vqrdmlshq_lane_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x2_t, - ) -> int32x4_t; - fn vqrdmlshq_laneq_s32( - a: int32x4_t, - b: int32x4_t, - c: int32x4_t, - ) -> int32x4_t; - fn vqrdmlshh_lane_s16(a: i16, b: i16, c: int16x4_t) -> i16; - fn vqrdmlshh_laneq_s16(a: i16, b: i16, c: int16x8_t) -> i16; - fn vqrdmlshs_lane_s32(a: i32, b: i32, c: int32x2_t) -> i32; - fn vqrdmlshs_laneq_s32(a: i32, b: i32, c: int32x4_t) -> i32; - fn vqrshls_s32(a: i32, b: i32) -> i32; - fn vqrshld_s64(a: i64, b: i64) -> i64; - fn vqrshlb_s8(a: i8, b: i8) -> i8; - fn vqrshlh_s16(a: i16, b: i16) -> i16; - fn vqrshls_u32(a: u32, b: i32) -> u32; - fn vqrshld_u64(a: u64, b: i64) -> u64; - fn vqrshlb_u8(a: u8, b: i8) -> u8; - fn vqrshlh_u16(a: u16, b: i16) -> u16; - fn vqrshrnh_n_s16(a: i16) -> i8; - fn vqrshrns_n_s32(a: i32) -> i16; - fn vqrshrnd_n_s64(a: i64) -> i32; - fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t; - fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t; - fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t; - fn vqrshrnh_n_u16(a: u16) -> u8; - fn vqrshrns_n_u32(a: u32) -> u16; - fn vqrshrnd_n_u64(a: u64) -> u32; - fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t; - fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t; - fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t; - fn vqrshrunh_n_s16(a: i16) -> u8; - fn vqrshruns_n_s32(a: i32) -> u16; - fn vqrshrund_n_s64(a: i64) -> u32; - fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t; - fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t; - fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t; - fn vqshld_s64(a: i64, b: i64) -> i64; - fn vqshlb_s8(a: i8, b: i8) -> i8; - fn vqshlh_s16(a: i16, b: i16) -> i16; - fn vqshls_s32(a: i32, b: i32) -> i32; - fn vqshld_u64(a: u64, b: i64) -> u64; - fn vqshlb_u8(a: u8, b: i8) -> u8; - fn vqshlh_u16(a: u16, b: i16) -> u16; - fn vqshls_u32(a: u32, b: i32) -> u32; - fn vqshlb_n_s8(a: i8) -> i8; - fn vqshlh_n_s16(a: i16) -> i16; - fn vqshls_n_s32(a: i32) -> i32; - fn vqshld_n_s64(a: i64) -> i64; - fn vqshlb_n_u8(a: u8) -> u8; - fn vqshlh_n_u16(a: u16) -> u16; - fn vqshls_n_u32(a: u32) -> u32; - fn vqshld_n_u64(a: u64) -> u64; - fn vqshlub_n_s8(a: i8) -> u8; - fn vqshluh_n_s16(a: i16) -> u16; - fn vqshlus_n_s32(a: i32) -> u32; - fn vqshlud_n_s64(a: i64) -> u64; - fn vqshrnd_n_s64(a: i64) -> i32; - fn vqshrnh_n_s16(a: i16) -> i8; - fn vqshrns_n_s32(a: i32) -> i16; - fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t; - fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t; - fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t; - fn vqshrnd_n_u64(a: u64) -> u32; - fn vqshrnh_n_u16(a: u16) -> u8; - fn vqshrns_n_u32(a: u32) -> u16; - fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t; - fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t; - fn vqshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t; - fn vqshrunh_n_s16(a: i16) -> u8; - fn vqshruns_n_s32(a: i32) -> u16; - fn vqshrund_n_s64(a: i64) -> u32; - fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t; - fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t; - fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t; - fn vsqaddb_u8(a: u8, b: i8) -> u8; - fn vsqaddh_u16(a: u16, b: i16) -> u16; - fn vsqadds_u32(a: u32, b: i32) -> u32; - fn vsqaddd_u64(a: u64, b: i64) -> u64; - fn vsqrt_f32(a: float32x2_t) -> float32x2_t; - fn vsqrtq_f32(a: float32x4_t) -> float32x4_t; - fn vsqrt_f64(a: float64x1_t) -> float64x1_t; - fn vsqrtq_f64(a: float64x2_t) -> float64x2_t; - fn vrsqrte_f64(a: float64x1_t) -> float64x1_t; - fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t; - fn vrsqrtes_f32(a: f32) -> f32; - fn vrsqrted_f64(a: f64) -> f64; - fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vrsqrtss_f32(a: f32, b: f32) -> f32; - fn vrsqrtsd_f64(a: f64, b: f64) -> f64; - fn vrecpe_f64(a: float64x1_t) -> float64x1_t; - fn vrecpeq_f64(a: float64x2_t) -> float64x2_t; - fn vrecpes_f32(a: f32) -> f32; - fn vrecped_f64(a: f64) -> f64; - fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vrecpss_f32(a: f32, b: f32) -> f32; - fn vrecpsd_f64(a: f64, b: f64) -> f64; - fn vrecpxs_f32(a: f32) -> f32; - fn vrecpxd_f64(a: f64) -> f64; - fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t; - fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t; - fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t; - fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t; - fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t; - fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t; - fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t; - fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t; - fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t; - fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t; - fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t; - fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t; - fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t; - fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t; - fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t; - fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t; - fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t; - fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t; - fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t; - fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t; - fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t; - fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t; - fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t; - fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t; - fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t; - fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t; - fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t; - fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t; - fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t; - fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t; - fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t; - fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t; - fn vreinterpretq_p128_f64(a: float64x2_t) -> p128; - fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t; - fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t; - fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t; - fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t; - fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t; - fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t; - fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t; - fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t; - fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t; - fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t; - fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t; - fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t; - fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t; - fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t; - fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t; - fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t; - fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t; - fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t; - fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t; - fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t; - fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t; - fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t; - fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t; - fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t; - fn vreinterpretq_f64_p128(a: p128) -> float64x2_t; - fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t; - fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t; - fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t; - fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t; - fn vrshld_s64(a: i64, b: i64) -> i64; - fn vrshld_u64(a: u64, b: i64) -> u64; - fn vrshrd_n_s64(a: i64) -> i64; - fn vrshrd_n_u64(a: u64) -> u64; - fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t; - fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t; - fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t; - fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t; - fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t; - fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t; - fn vrsrad_n_s64(a: i64, b: i64) -> i64; - fn vrsrad_n_u64(a: u64, b: u64) -> u64; - fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t; - fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t; - fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t; - fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t; - fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t; - fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t; - fn vset_lane_f64(a: f64, b: float64x1_t) -> float64x1_t; - fn vsetq_lane_f64(a: f64, b: float64x2_t) -> float64x2_t; - fn vshld_s64(a: i64, b: i64) -> i64; - fn vshld_u64(a: u64, b: i64) -> u64; - fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t; - fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t; - fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t; - fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t; - fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t; - fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t; - fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t; - fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t; - fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t; - fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t; - fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t; - fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t; - fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t; - fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t; - fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t; - fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t; - fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t; - fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t; - fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t; - fn vqabs_s64(a: int64x1_t) -> int64x1_t; - fn vqabsq_s64(a: int64x2_t) -> int64x2_t; - fn vqabsb_s8(a: i8) -> i8; - fn vqabsh_s16(a: i16) -> i16; - fn vqabss_s32(a: i32) -> i32; - fn vqabsd_s64(a: i64) -> i64; - fn vslid_n_s64(a: i64, b: i64) -> i64; - fn vslid_n_u64(a: u64, b: u64) -> u64; - fn vsrid_n_s64(a: i64, b: i64) -> i64; - fn vsrid_n_u64(a: u64, b: u64) -> u64; - - fn vcopy_lane_s64(_a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vcopy_lane_u64( - _a: uint64x1_t, - b: uint64x1_t, - ) -> uint64x1_t; - fn vcopy_lane_p64( - _a: poly64x1_t, - b: poly64x1_t, - ) -> poly64x1_t; - fn vcopy_lane_f64( - _a: float64x1_t, - b: float64x1_t, - ) -> float64x1_t; - fn vcopy_laneq_s64( - _a: int64x1_t, - b: int64x2_t, - ) -> int64x1_t; - fn vcopy_laneq_u64( - _a: uint64x1_t, - b: uint64x2_t, - ) -> uint64x1_t; - fn vcopy_laneq_p64( - _a: poly64x1_t, - b: poly64x2_t, - ) -> poly64x1_t; - fn vcopy_laneq_f64( - _a: float64x1_t, - b: float64x2_t, - ) -> float64x1_t; - unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t; - unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t; - unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t; - unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t; - unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t; - unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t; - unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t; - unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t; - unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t; - unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t; - unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t; - unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t; - unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t; - unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t; - unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t; - unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t; - unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t; - unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t; - unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t; - unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t; - unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t; - unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t; - unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t; - unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t; - unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t; - unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t; - unsafe fn vld1_lane_f64(ptr: *const f64, src: float64x1_t) -> float64x1_t; - unsafe fn vld1q_lane_f64(ptr: *const f64, src: float64x2_t) - -> float64x2_t; - unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t); - unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t); - unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t); - unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t); - unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t); - unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t); - unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t); - unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t); - unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t); - unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t); - unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t); - unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t); - unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t); - unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t); - unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t); - unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t); - unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t); - unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t); - unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t); - unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t); - unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t); - unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t); - unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t); - unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t); - fn vabsd_s64(a: i64) -> i64; - fn vabs_s64(a: int64x1_t) -> int64x1_t; - fn vabsq_s64(a: int64x2_t) -> int64x2_t; - fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t; - fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t; - fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t; - fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t; - fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t; - fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t; - fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t; - fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t; - fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t; - fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t; - fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t; - fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t; - fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vpaddd_s64(a: int64x2_t) -> i64; - fn vpaddd_u64(a: uint64x2_t) -> u64; - fn vaddv_s16(a: int16x4_t) -> i16; - fn vaddv_s32(a: int32x2_t) -> i32; - fn vaddv_s8(a: int8x8_t) -> i8; - fn vaddv_u16(a: uint16x4_t) -> u16; - fn vaddv_u32(a: uint32x2_t) -> u32; - fn vaddv_u8(a: uint8x8_t) -> u8; - fn vaddvq_s16(a: int16x8_t) -> i16; - fn vaddvq_s32(a: int32x4_t) -> i32; - fn vaddvq_s8(a: int8x16_t) -> i8; - fn vaddvq_u16(a: uint16x8_t) -> u16; - fn vaddvq_u32(a: uint32x4_t) -> u32; - fn vaddvq_u8(a: uint8x16_t) -> u8; - fn vaddvq_s64(a: int64x2_t) -> i64; - fn vaddvq_u64(a: uint64x2_t) -> u64; - fn vaddlv_s8(a: int8x8_t) -> i16; - fn vaddlvq_s8(a: int8x16_t) -> i16; - fn vaddlv_u8(a: uint8x8_t) -> u16; - fn vaddlvq_u8(a: uint8x16_t) -> u16; - fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t; - fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vaddd_s64(a: i64, b: i64) -> i64; - fn vaddd_u64(a: u64, b: u64) -> u64; - fn vmaxv_s8(a: int8x8_t) -> i8; - fn vmaxvq_s8(a: int8x16_t) -> i8; - fn vmaxv_s16(a: int16x4_t) -> i16; - fn vmaxvq_s16(a: int16x8_t) -> i16; - fn vmaxv_s32(a: int32x2_t) -> i32; - fn vmaxvq_s32(a: int32x4_t) -> i32; - fn vmaxv_u8(a: uint8x8_t) -> u8; - fn vmaxvq_u8(a: uint8x16_t) -> u8; - fn vmaxv_u16(a: uint16x4_t) -> u16; - fn vmaxvq_u16(a: uint16x8_t) -> u16; - fn vmaxv_u32(a: uint32x2_t) -> u32; - fn vmaxvq_u32(a: uint32x4_t) -> u32; - fn vmaxv_f32(a: float32x2_t) -> f32; - fn vmaxvq_f32(a: float32x4_t) -> f32; - fn vmaxvq_f64(a: float64x2_t) -> f64; - fn vminv_s8(a: int8x8_t) -> i8; - fn vminvq_s8(a: int8x16_t) -> i8; - fn vminv_s16(a: int16x4_t) -> i16; - fn vminvq_s16(a: int16x8_t) -> i16; - fn vminv_s32(a: int32x2_t) -> i32; - fn vminvq_s32(a: int32x4_t) -> i32; - fn vminv_u8(a: uint8x8_t) -> u8; - fn vminvq_u8(a: uint8x16_t) -> u8; - fn vminv_u16(a: uint16x4_t) -> u16; - fn vminvq_u16(a: uint16x8_t) -> u16; - fn vminv_u32(a: uint32x2_t) -> u32; - fn vminvq_u32(a: uint32x4_t) -> u32; - fn vminv_f32(a: float32x2_t) -> f32; - fn vminvq_f32(a: float32x4_t) -> f32; - fn vminvq_f64(a: float64x2_t) -> f64; - fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; - fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; - fn vext_p64(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t; - fn vext_f64(a: float64x1_t, _b: float64x1_t) -> float64x1_t; - fn vdup_n_p64(value: p64) -> poly64x1_t; - fn vdup_n_f64(value: f64) -> float64x1_t; - fn vdupq_n_p64(value: p64) -> poly64x2_t; - fn vdupq_n_f64(value: f64) -> float64x2_t; - fn vmov_n_p64(value: p64) -> poly64x1_t; - fn vmov_n_f64(value: f64) -> float64x1_t; - fn vmovq_n_p64(value: p64) -> poly64x2_t; - fn vmovq_n_f64(value: f64) -> float64x2_t; - fn vget_high_f64(a: float64x2_t) -> float64x1_t; - fn vget_high_p64(a: poly64x2_t) -> poly64x1_t; - fn vget_low_f64(a: float64x2_t) -> float64x1_t; - fn vget_low_p64(a: poly64x2_t) -> poly64x1_t; - fn vget_lane_f64(v: float64x1_t) -> f64; - fn vgetq_lane_f64(v: float64x2_t) -> f64; - fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t; - fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t; - fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t; - fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t; - fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t; - fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t; - fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t; - fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t; - fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t; - fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t; - fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t; - fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t; - fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t; - fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t; - fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t; - fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t; - fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t; - fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t; - fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t; - fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t; - fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t; - fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t; - fn vqtbl1_s8(t: int8x16_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbl1q_s8(t: int8x16_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbl1_u8(t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbl1_p8(t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbl1q_p8(t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbx1_s8(a: int8x8_t, t: int8x16_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbx1q_s8(a: int8x16_t, t: int8x16_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbx1_u8(a: uint8x8_t, t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbx1q_u8(a: uint8x16_t, t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbx1_p8(a: poly8x8_t, t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbx1q_p8(a: poly8x16_t, t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbl2_s8(t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbl2q_s8(t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbl2_u8(t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbl2q_u8(t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbl2_p8(t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbl2q_p8(t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbx2_s8(a: int8x8_t, t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbx2q_s8(a: int8x16_t, t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbx2_u8(a: uint8x8_t, t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbx2q_u8(a: uint8x16_t, t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbx2_p8(a: poly8x8_t, t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbx2q_p8(a: poly8x16_t, t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbl3_s8(t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbl3q_s8(t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbl3_u8(t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbl3q_u8(t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbl3_p8(t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbl3q_p8(t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbx3_s8(a: int8x8_t, t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbx3q_s8(a: int8x16_t, t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbx3_u8(a: uint8x8_t, t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbx3q_u8(a: uint8x16_t, t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbx3_p8(a: poly8x8_t, t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbx3q_p8(a: poly8x16_t, t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbl4_s8(t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbl4q_s8(t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbl4_u8(t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbl4q_u8(t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbl4_p8(t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbl4q_p8(t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t; - fn vqtbx4_s8(a: int8x8_t, t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t; - fn vqtbx4q_s8(a: int8x16_t, t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t; - fn vqtbx4_u8(a: uint8x8_t, t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t; - fn vqtbx4q_u8(a: uint8x16_t, t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t; - fn vqtbx4_p8(a: poly8x8_t, t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t; - fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t; - fn vshld_n_s64(a: i64) -> i64; - fn vshld_n_u64(a: u64) -> u64; - fn vshrd_n_s64(a: i64) -> i64; - fn vshrd_n_u64(a: u64) -> u64; - fn vsrad_n_s64(a: i64, b: i64) -> i64; - fn vsrad_n_u64(a: u64, b: u64) -> u64; - fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; - fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; - fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; - fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t; - fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; - fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; - fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; - fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t; - fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t; - } -} diff --git a/fearless_simd/src/core_arch/fallback.rs b/fearless_simd/src/core_arch/fallback.rs deleted file mode 100644 index 2ef9ee05..00000000 --- a/fearless_simd/src/core_arch/fallback.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2025 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -/// A token for fallback SIMD. -#[derive(Clone, Copy, Debug)] -pub struct Fallback { - _private: (), -} - -impl Fallback { - /// Create a SIMD token. - #[inline] - pub const fn new() -> Self { - Self { _private: () } - } -} diff --git a/fearless_simd/src/core_arch/mod.rs b/fearless_simd/src/core_arch/mod.rs deleted file mode 100644 index 51599cd5..00000000 --- a/fearless_simd/src/core_arch/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to architecture-specific intrinsics. - -#![expect( - missing_docs, - clippy::new_without_default, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - -#[cfg(target_arch = "aarch64")] -pub mod aarch64; - -pub mod fallback; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub mod x86; - -#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] -pub mod wasm32; diff --git a/fearless_simd/src/core_arch/wasm32/mod.rs b/fearless_simd/src/core_arch/wasm32/mod.rs deleted file mode 100644 index 4636eeeb..00000000 --- a/fearless_simd/src/core_arch/wasm32/mod.rs +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2025 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -/// A token for WASM SIMD128. -#[derive(Clone, Copy, Debug)] -pub struct WasmSimd128 { - _private: (), -} - -// There is intentionally no method delegation here because all the WASM SIMD128 methods are enabled or disabled -// statically--there is no feature detection. -impl WasmSimd128 { - /// Create a SIMD token. - #[inline] - pub const fn new() -> Self { - Self { _private: () } - } -} diff --git a/fearless_simd/src/core_arch/x86/avx.rs b/fearless_simd/src/core_arch/x86/avx.rs deleted file mode 100644 index 99c953f7..00000000 --- a/fearless_simd/src/core_arch/x86/avx.rs +++ /dev/null @@ -1,340 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to AVX intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for AVX intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Avx { - _private: (), -} - -#[expect( - clippy::missing_safety_doc, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] -impl Avx { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_add_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_and_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_or_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_max_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_min_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_div_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_round_pd(a: __m256d) -> __m256d; - fn _mm256_ceil_pd(a: __m256d) -> __m256d; - fn _mm256_floor_pd(a: __m256d) -> __m256d; - fn _mm256_round_ps(a: __m256) -> __m256; - fn _mm256_ceil_ps(a: __m256) -> __m256; - fn _mm256_floor_ps(a: __m256) -> __m256; - fn _mm256_sqrt_ps(a: __m256) -> __m256; - fn _mm256_sqrt_pd(a: __m256d) -> __m256d; - fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256; - fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128; - fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256; - fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128; - fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d; - fn _mm256_cvtepi32_ps(a: __m256i) -> __m256; - fn _mm256_cvtpd_ps(a: __m256d) -> __m128; - fn _mm256_cvtps_epi32(a: __m256) -> __m256i; - fn _mm256_cvtps_pd(a: __m128) -> __m256d; - fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i; - fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i; - fn _mm256_cvttps_epi32(a: __m256) -> __m256i; - fn _mm256_extractf128_ps(a: __m256) -> __m128; - fn _mm256_extractf128_pd(a: __m256d) -> __m128d; - fn _mm256_extractf128_si256(a: __m256i) -> __m128i; - fn _mm256_zeroall(); - fn _mm256_zeroupper(); - fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256; - fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128; - fn _mm256_permute_ps(a: __m256) -> __m256; - fn _mm_permute_ps(a: __m128) -> __m128; - fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d; - fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d; - fn _mm256_permute_pd(a: __m256d) -> __m256d; - fn _mm_permute_pd(a: __m128d) -> __m128d; - fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_broadcast_ss(f: &f32) -> __m256; - fn _mm_broadcast_ss(f: &f32) -> __m128; - fn _mm256_broadcast_sd(f: &f64) -> __m256d; - fn _mm256_broadcast_ps(a: &__m128) -> __m256; - fn _mm256_broadcast_pd(a: &__m128d) -> __m256d; - fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256; - fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d; - fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i; - fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i; - fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i; - fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i; - unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d; - unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d); - unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256; - unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256); - unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d; - unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d); - unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256; - unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256); - unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i; - unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i); - unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i; - unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i); - unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d; - unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d); - unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d; - unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d); - unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256; - unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256); - unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128; - unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128); - fn _mm256_movehdup_ps(a: __m256) -> __m256; - fn _mm256_moveldup_ps(a: __m256) -> __m256; - fn _mm256_movedup_pd(a: __m256d) -> __m256d; - unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i; - unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i); - unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d); - unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256); - fn _mm256_rcp_ps(a: __m256) -> __m256; - fn _mm256_rsqrt_ps(a: __m256) -> __m256; - fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d; - fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256; - fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32; - fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32; - fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32; - fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32; - fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32; - fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32; - fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32; - fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32; - fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32; - fn _mm256_testz_ps(a: __m256, b: __m256) -> i32; - fn _mm256_testc_ps(a: __m256, b: __m256) -> i32; - fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32; - fn _mm_testz_ps(a: __m128, b: __m128) -> i32; - fn _mm_testc_ps(a: __m128, b: __m128) -> i32; - fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32; - fn _mm256_movemask_pd(a: __m256d) -> i32; - fn _mm256_movemask_ps(a: __m256) -> i32; - fn _mm256_setzero_pd() -> __m256d; - fn _mm256_setzero_ps() -> __m256; - fn _mm256_setzero_si256() -> __m256i; - fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d; - fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256; - fn _mm256_set_epi8( - e00: i8, - e01: i8, - e02: i8, - e03: i8, - e04: i8, - e05: i8, - e06: i8, - e07: i8, - e08: i8, - e09: i8, - e10: i8, - e11: i8, - e12: i8, - e13: i8, - e14: i8, - e15: i8, - e16: i8, - e17: i8, - e18: i8, - e19: i8, - e20: i8, - e21: i8, - e22: i8, - e23: i8, - e24: i8, - e25: i8, - e26: i8, - e27: i8, - e28: i8, - e29: i8, - e30: i8, - e31: i8, - ) -> __m256i; - fn _mm256_set_epi16( - e00: i16, - e01: i16, - e02: i16, - e03: i16, - e04: i16, - e05: i16, - e06: i16, - e07: i16, - e08: i16, - e09: i16, - e10: i16, - e11: i16, - e12: i16, - e13: i16, - e14: i16, - e15: i16, - ) -> __m256i; - fn _mm256_set_epi32( - e0: i32, - e1: i32, - e2: i32, - e3: i32, - e4: i32, - e5: i32, - e6: i32, - e7: i32, - ) -> __m256i; - fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i; - fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d; - fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) - -> __m256; - fn _mm256_setr_epi8( - e00: i8, - e01: i8, - e02: i8, - e03: i8, - e04: i8, - e05: i8, - e06: i8, - e07: i8, - e08: i8, - e09: i8, - e10: i8, - e11: i8, - e12: i8, - e13: i8, - e14: i8, - e15: i8, - e16: i8, - e17: i8, - e18: i8, - e19: i8, - e20: i8, - e21: i8, - e22: i8, - e23: i8, - e24: i8, - e25: i8, - e26: i8, - e27: i8, - e28: i8, - e29: i8, - e30: i8, - e31: i8, - ) -> __m256i; - fn _mm256_setr_epi16( - e00: i16, - e01: i16, - e02: i16, - e03: i16, - e04: i16, - e05: i16, - e06: i16, - e07: i16, - e08: i16, - e09: i16, - e10: i16, - e11: i16, - e12: i16, - e13: i16, - e14: i16, - e15: i16, - ) -> __m256i; - fn _mm256_setr_epi32( - e0: i32, - e1: i32, - e2: i32, - e3: i32, - e4: i32, - e5: i32, - e6: i32, - e7: i32, - ) -> __m256i; - fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i; - fn _mm256_set1_pd(a: f64) -> __m256d; - fn _mm256_set1_ps(a: f32) -> __m256; - fn _mm256_set1_epi8(a: i8) -> __m256i; - fn _mm256_set1_epi16(a: i16) -> __m256i; - fn _mm256_set1_epi32(a: i32) -> __m256i; - fn _mm256_set1_epi64x(a: i64) -> __m256i; - fn _mm256_castpd_ps(a: __m256d) -> __m256; - fn _mm256_castps_pd(a: __m256) -> __m256d; - fn _mm256_castps_si256(a: __m256) -> __m256i; - fn _mm256_castsi256_ps(a: __m256i) -> __m256; - fn _mm256_castpd_si256(a: __m256d) -> __m256i; - fn _mm256_castsi256_pd(a: __m256i) -> __m256d; - fn _mm256_castps256_ps128(a: __m256) -> __m128; - fn _mm256_castpd256_pd128(a: __m256d) -> __m128d; - fn _mm256_castsi256_si128(a: __m256i) -> __m128i; - fn _mm256_castps128_ps256(a: __m128) -> __m256; - fn _mm256_castpd128_pd256(a: __m128d) -> __m256d; - fn _mm256_castsi128_si256(a: __m128i) -> __m256i; - fn _mm256_zextps128_ps256(a: __m128) -> __m256; - fn _mm256_zextsi128_si256(a: __m128i) -> __m256i; - fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d; - fn _mm256_undefined_ps() -> __m256; - fn _mm256_undefined_pd() -> __m256d; - fn _mm256_undefined_si256() -> __m256i; - fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256; - fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d; - fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i; - fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256; - fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d; - fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i; - unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256; - unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d; - unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i; - unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256); - unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d); - unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i); - fn _mm256_cvtss_f32(a: __m256) -> f32; - } -} diff --git a/fearless_simd/src/core_arch/x86/avx2.rs b/fearless_simd/src/core_arch/x86/avx2.rs deleted file mode 100644 index 01f8e76c..00000000 --- a/fearless_simd/src/core_arch/x86/avx2.rs +++ /dev/null @@ -1,355 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -#![expect( - clippy::missing_safety_doc, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - -//! Access to AVX2 intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for AVX2 intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Avx2 { - _private: (), -} - -impl Avx2 { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm256_abs_epi32(a: __m256i) -> __m256i; - fn _mm256_abs_epi16(a: __m256i) -> __m256i; - fn _mm256_abs_epi8(a: __m256i) -> __m256i; - fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i; - fn _mm_broadcastb_epi8(a: __m128i) -> __m128i; - fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i; - fn _mm_broadcastd_epi32(a: __m128i) -> __m128i; - fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i; - fn _mm_broadcastq_epi64(a: __m128i) -> __m128i; - fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i; - fn _mm_broadcastsd_pd(a: __m128d) -> __m128d; - fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d; - fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i; - fn _mm_broadcastss_ps(a: __m128) -> __m128; - fn _mm256_broadcastss_ps(a: __m128) -> __m256; - fn _mm_broadcastw_epi16(a: __m128i) -> __m128i; - fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i; - fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i; - fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i; - fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i; - fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i; - fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i; - fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i; - fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i; - fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i; - fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i; - fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i; - fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i; - fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i; - fn _mm256_extracti128_si256(a: __m256i) -> __m128i; - fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i; - unsafe fn _mm_i32gather_epi32( - slice: *const i32, - offsets: __m128i, - ) -> __m128i; - unsafe fn _mm_mask_i32gather_epi32( - src: __m128i, - slice: *const i32, - offsets: __m128i, - mask: __m128i, - ) -> __m128i; - unsafe fn _mm256_i32gather_epi32( - slice: *const i32, - offsets: __m256i, - ) -> __m256i; - unsafe fn _mm256_mask_i32gather_epi32( - src: __m256i, - slice: *const i32, - offsets: __m256i, - mask: __m256i, - ) -> __m256i; - unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: __m128i) - -> __m128; - unsafe fn _mm_mask_i32gather_ps( - src: __m128, - slice: *const f32, - offsets: __m128i, - mask: __m128, - ) -> __m128; - unsafe fn _mm256_i32gather_ps( - slice: *const f32, - offsets: __m256i, - ) -> __m256; - unsafe fn _mm256_mask_i32gather_ps( - src: __m256, - slice: *const f32, - offsets: __m256i, - mask: __m256, - ) -> __m256; - unsafe fn _mm_i32gather_epi64( - slice: *const i64, - offsets: __m128i, - ) -> __m128i; - unsafe fn _mm_mask_i32gather_epi64( - src: __m128i, - slice: *const i64, - offsets: __m128i, - mask: __m128i, - ) -> __m128i; - unsafe fn _mm256_i32gather_epi64( - slice: *const i64, - offsets: __m128i, - ) -> __m256i; - unsafe fn _mm256_mask_i32gather_epi64( - src: __m256i, - slice: *const i64, - offsets: __m128i, - mask: __m256i, - ) -> __m256i; - unsafe fn _mm_i32gather_pd( - slice: *const f64, - offsets: __m128i, - ) -> __m128d; - unsafe fn _mm_mask_i32gather_pd( - src: __m128d, - slice: *const f64, - offsets: __m128i, - mask: __m128d, - ) -> __m128d; - unsafe fn _mm256_i32gather_pd( - slice: *const f64, - offsets: __m128i, - ) -> __m256d; - unsafe fn _mm256_mask_i32gather_pd( - src: __m256d, - slice: *const f64, - offsets: __m128i, - mask: __m256d, - ) -> __m256d; - unsafe fn _mm_i64gather_epi32( - slice: *const i32, - offsets: __m128i, - ) -> __m128i; - unsafe fn _mm_mask_i64gather_epi32( - src: __m128i, - slice: *const i32, - offsets: __m128i, - mask: __m128i, - ) -> __m128i; - unsafe fn _mm256_i64gather_epi32( - slice: *const i32, - offsets: __m256i, - ) -> __m128i; - unsafe fn _mm256_mask_i64gather_epi32( - src: __m128i, - slice: *const i32, - offsets: __m256i, - mask: __m128i, - ) -> __m128i; - unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: __m128i) - -> __m128; - unsafe fn _mm_mask_i64gather_ps( - src: __m128, - slice: *const f32, - offsets: __m128i, - mask: __m128, - ) -> __m128; - unsafe fn _mm256_i64gather_ps( - slice: *const f32, - offsets: __m256i, - ) -> __m128; - unsafe fn _mm256_mask_i64gather_ps( - src: __m128, - slice: *const f32, - offsets: __m256i, - mask: __m128, - ) -> __m128; - unsafe fn _mm_i64gather_epi64( - slice: *const i64, - offsets: __m128i, - ) -> __m128i; - unsafe fn _mm_mask_i64gather_epi64( - src: __m128i, - slice: *const i64, - offsets: __m128i, - mask: __m128i, - ) -> __m128i; - unsafe fn _mm256_i64gather_epi64( - slice: *const i64, - offsets: __m256i, - ) -> __m256i; - unsafe fn _mm256_mask_i64gather_epi64( - src: __m256i, - slice: *const i64, - offsets: __m256i, - mask: __m256i, - ) -> __m256i; - unsafe fn _mm_i64gather_pd( - slice: *const f64, - offsets: __m128i, - ) -> __m128d; - unsafe fn _mm_mask_i64gather_pd( - src: __m128d, - slice: *const f64, - offsets: __m128i, - mask: __m128d, - ) -> __m128d; - unsafe fn _mm256_i64gather_pd( - slice: *const f64, - offsets: __m256i, - ) -> __m256d; - unsafe fn _mm256_mask_i64gather_pd( - src: __m256d, - slice: *const f64, - offsets: __m256i, - mask: __m256d, - ) -> __m256d; - fn _mm256_inserti128_si256(a: __m256i, b: __m128i) -> __m256i; - fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i; - unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i; - unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i; - unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i; - unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i; - unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i); - unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i); - unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i); - unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i); - fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_movemask_epi8(a: __m256i) -> i32; - fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i; - fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_permute4x64_pd(a: __m256d) -> __m256d; - fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256; - fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_shuffle_epi32(a: __m256i) -> __m256i; - fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i; - fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i; - fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_slli_epi16(a: __m256i) -> __m256i; - fn _mm256_slli_epi32(a: __m256i) -> __m256i; - fn _mm256_slli_epi64(a: __m256i) -> __m256i; - fn _mm256_slli_si256(a: __m256i) -> __m256i; - fn _mm256_bslli_epi128(a: __m256i) -> __m256i; - fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i; - fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i; - fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i; - fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i; - fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_srai_epi16(a: __m256i) -> __m256i; - fn _mm256_srai_epi32(a: __m256i) -> __m256i; - fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i; - fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i; - fn _mm256_srli_si256(a: __m256i) -> __m256i; - fn _mm256_bsrli_epi128(a: __m256i) -> __m256i; - fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i; - fn _mm256_srli_epi16(a: __m256i) -> __m256i; - fn _mm256_srli_epi32(a: __m256i) -> __m256i; - fn _mm256_srli_epi64(a: __m256i) -> __m256i; - fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i; - fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i; - fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i; - fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i; - fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i; - fn _mm256_extract_epi8(a: __m256i) -> i32; - fn _mm256_extract_epi16(a: __m256i) -> i32; - fn _mm256_extract_epi32(a: __m256i) -> i32; - fn _mm256_cvtsd_f64(a: __m256d) -> f64; - fn _mm256_cvtsi256_si32(a: __m256i) -> i32; - } -} diff --git a/fearless_simd/src/core_arch/x86/fma.rs b/fearless_simd/src/core_arch/x86/fma.rs deleted file mode 100644 index 2117b65b..00000000 --- a/fearless_simd/src/core_arch/x86/fma.rs +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to FMA intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for FMA intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Fma { - _private: (), -} - -impl Fma { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; - fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128; - fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256; - fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; - fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128; - } -} diff --git a/fearless_simd/src/core_arch/x86/mod.rs b/fearless_simd/src/core_arch/x86/mod.rs deleted file mode 100644 index 14f9a2a1..00000000 --- a/fearless_simd/src/core_arch/x86/mod.rs +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to intrinsics on `x86` and `x86_64`. - -mod avx; -mod avx2; -mod fma; -mod sse; -mod sse2; -mod sse3; -mod sse4_1; -mod sse4_2; -mod ssse3; - -pub use avx::Avx; -pub use avx2::Avx2; -pub use fma::Fma; -pub use sse::Sse; -pub use sse2::Sse2; -pub use sse3::Sse3; -pub use sse4_1::Sse4_1; -pub use sse4_2::Sse4_2; -pub use ssse3::Ssse3; diff --git a/fearless_simd/src/core_arch/x86/sse.rs b/fearless_simd/src/core_arch/x86/sse.rs deleted file mode 100644 index 09eeb953..00000000 --- a/fearless_simd/src/core_arch/x86/sse.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to SSE intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for SSE intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Sse { - _private: (), -} - -#[expect( - clippy::missing_safety_doc, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] -impl Sse { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_add_ss(a: __m128, b: __m128) -> __m128; - fn _mm_add_ps(a: __m128, b: __m128) -> __m128; - fn _mm_sub_ss(a: __m128, b: __m128) -> __m128; - fn _mm_sub_ps(a: __m128, b: __m128) -> __m128; - fn _mm_mul_ss(a: __m128, b: __m128) -> __m128; - fn _mm_mul_ps(a: __m128, b: __m128) -> __m128; - fn _mm_div_ss(a: __m128, b: __m128) -> __m128; - fn _mm_div_ps(a: __m128, b: __m128) -> __m128; - fn _mm_sqrt_ss(a: __m128) -> __m128; - fn _mm_sqrt_ps(a: __m128) -> __m128; - fn _mm_rcp_ss(a: __m128) -> __m128; - fn _mm_rcp_ps(a: __m128) -> __m128; - fn _mm_rsqrt_ss(a: __m128) -> __m128; - fn _mm_rsqrt_ps(a: __m128) -> __m128; - fn _mm_min_ss(a: __m128, b: __m128) -> __m128; - fn _mm_min_ps(a: __m128, b: __m128) -> __m128; - fn _mm_max_ss(a: __m128, b: __m128) -> __m128; - fn _mm_max_ps(a: __m128, b: __m128) -> __m128; - fn _mm_and_ps(a: __m128, b: __m128) -> __m128; - fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128; - fn _mm_or_ps(a: __m128, b: __m128) -> __m128; - fn _mm_xor_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128; - fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128; - fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128; - fn _mm_comieq_ss(a: __m128, b: __m128) -> i32; - fn _mm_comilt_ss(a: __m128, b: __m128) -> i32; - fn _mm_comile_ss(a: __m128, b: __m128) -> i32; - fn _mm_comigt_ss(a: __m128, b: __m128) -> i32; - fn _mm_comige_ss(a: __m128, b: __m128) -> i32; - fn _mm_comineq_ss(a: __m128, b: __m128) -> i32; - fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32; - fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32; - fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32; - fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32; - fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32; - fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32; - fn _mm_cvtss_si32(a: __m128) -> i32; - fn _mm_cvt_ss2si(a: __m128) -> i32; - fn _mm_cvttss_si32(a: __m128) -> i32; - fn _mm_cvtt_ss2si(a: __m128) -> i32; - fn _mm_cvtss_f32(a: __m128) -> f32; - fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128; - fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128; - fn _mm_set_ss(a: f32) -> __m128; - fn _mm_set1_ps(a: f32) -> __m128; - fn _mm_set_ps1(a: f32) -> __m128; - fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128; - fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128; - fn _mm_setzero_ps() -> __m128; - fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128; - fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128; - fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128; - fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128; - fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128; - fn _mm_movemask_ps(a: __m128) -> i32; - unsafe fn _mm_load_ss(p: *const f32) -> __m128; - unsafe fn _mm_load1_ps(p: *const f32) -> __m128; - unsafe fn _mm_load_ps1(p: *const f32) -> __m128; - unsafe fn _mm_load_ps(p: *const f32) -> __m128; - unsafe fn _mm_loadu_ps(p: *const f32) -> __m128; - unsafe fn _mm_loadr_ps(p: *const f32) -> __m128; - unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i; - unsafe fn _mm_store_ss(p: *mut f32, a: __m128); - unsafe fn _mm_store1_ps(p: *mut f32, a: __m128); - unsafe fn _mm_store_ps1(p: *mut f32, a: __m128); - unsafe fn _mm_store_ps(p: *mut f32, a: __m128); - unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128); - unsafe fn _mm_storer_ps(p: *mut f32, a: __m128); - fn _mm_move_ss(a: __m128, b: __m128) -> __m128; - fn _mm_sfence(); - #[expect(clippy::not_unsafe_ptr_arg_deref, reason="Prefetch has no preconditions, so is valid to accept a pointer.")] - fn _mm_prefetch(p: *const i8); - fn _mm_undefined_ps() -> __m128; - #[allow(non_snake_case)] - fn _MM_TRANSPOSE4_PS( - row0: &mut __m128, - row1: &mut __m128, - row2: &mut __m128, - row3: &mut __m128, - ); - unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128); - } -} diff --git a/fearless_simd/src/core_arch/x86/sse2.rs b/fearless_simd/src/core_arch/x86/sse2.rs deleted file mode 100644 index cf320ef8..00000000 --- a/fearless_simd/src/core_arch/x86/sse2.rs +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to SSE2 intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for SSE2 intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Sse2 { - _private: (), -} - -#[expect( - clippy::missing_safety_doc, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] -impl Sse2 { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_pause(); - unsafe fn _mm_clflush(p: *const u8); - fn _mm_lfence(); - fn _mm_mfence(); - fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i; - fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i; - fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_slli_si128(a: __m128i) -> __m128i; - fn _mm_bslli_si128(a: __m128i) -> __m128i; - fn _mm_bsrli_si128(a: __m128i) -> __m128i; - fn _mm_slli_epi16(a: __m128i) -> __m128i; - fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i; - fn _mm_slli_epi32(a: __m128i) -> __m128i; - fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i; - fn _mm_slli_epi64(a: __m128i) -> __m128i; - fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i; - fn _mm_srai_epi16(a: __m128i) -> __m128i; - fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i; - fn _mm_srai_epi32(a: __m128i) -> __m128i; - fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i; - fn _mm_srli_si128(a: __m128i) -> __m128i; - fn _mm_srli_epi16(a: __m128i) -> __m128i; - fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i; - fn _mm_srli_epi32(a: __m128i) -> __m128i; - fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i; - fn _mm_srli_epi64(a: __m128i) -> __m128i; - fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i; - fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i; - fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i; - fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i; - fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cvtepi32_pd(a: __m128i) -> __m128d; - fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d; - fn _mm_cvtepi32_ps(a: __m128i) -> __m128; - fn _mm_cvtps_epi32(a: __m128) -> __m128i; - fn _mm_cvtsi32_si128(a: i32) -> __m128i; - fn _mm_cvtsi128_si32(a: __m128i) -> i32; - fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i; - fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i; - fn _mm_set_epi16( - e7: i16, - e6: i16, - e5: i16, - e4: i16, - e3: i16, - e2: i16, - e1: i16, - e0: i16, - ) -> __m128i; - fn _mm_set_epi8( - e15: i8, - e14: i8, - e13: i8, - e12: i8, - e11: i8, - e10: i8, - e9: i8, - e8: i8, - e7: i8, - e6: i8, - e5: i8, - e4: i8, - e3: i8, - e2: i8, - e1: i8, - e0: i8, - ) -> __m128i; - fn _mm_set1_epi64x(a: i64) -> __m128i; - fn _mm_set1_epi32(a: i32) -> __m128i; - fn _mm_set1_epi16(a: i16) -> __m128i; - fn _mm_set1_epi8(a: i8) -> __m128i; - fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i; - fn _mm_setr_epi16( - e7: i16, - e6: i16, - e5: i16, - e4: i16, - e3: i16, - e2: i16, - e1: i16, - e0: i16, - ) -> __m128i; - fn _mm_setr_epi8( - e15: i8, - e14: i8, - e13: i8, - e12: i8, - e11: i8, - e10: i8, - e9: i8, - e8: i8, - e7: i8, - e6: i8, - e5: i8, - e4: i8, - e3: i8, - e2: i8, - e1: i8, - e0: i8, - ) -> __m128i; - fn _mm_setzero_si128() -> __m128i; - unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i; - unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i; - unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i; - unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8); - unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i); - unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i); - unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i); - unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i); - unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32); - fn _mm_move_epi64(a: __m128i) -> __m128i; - fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_extract_epi16(a: __m128i) -> i32; - fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i; - fn _mm_movemask_epi8(a: __m128i) -> i32; - fn _mm_shuffle_epi32(a: __m128i) -> __m128i; - fn _mm_shufflehi_epi16(a: __m128i) -> __m128i; - fn _mm_shufflelo_epi16(a: __m128i) -> __m128i; - fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i; - fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_sqrt_pd(a: __m128d) -> __m128d; - fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32; - fn _mm_cvtpd_ps(a: __m128d) -> __m128; - fn _mm_cvtps_pd(a: __m128) -> __m128d; - fn _mm_cvtpd_epi32(a: __m128d) -> __m128i; - fn _mm_cvtsd_si32(a: __m128d) -> i32; - fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128; - fn _mm_cvtsd_f64(a: __m128d) -> f64; - fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d; - fn _mm_cvttpd_epi32(a: __m128d) -> __m128i; - fn _mm_cvttsd_si32(a: __m128d) -> i32; - fn _mm_cvttps_epi32(a: __m128) -> __m128i; - fn _mm_set_sd(a: f64) -> __m128d; - fn _mm_set1_pd(a: f64) -> __m128d; - fn _mm_set_pd1(a: f64) -> __m128d; - fn _mm_set_pd(a: f64, b: f64) -> __m128d; - fn _mm_setr_pd(a: f64, b: f64) -> __m128d; - fn _mm_setzero_pd() -> __m128d; - fn _mm_movemask_pd(a: __m128d) -> i32; - unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d; - unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d; - unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d; - unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d; - unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d); - unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d; - unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d; - unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d; - unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d; - fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_castpd_ps(a: __m128d) -> __m128; - fn _mm_castpd_si128(a: __m128d) -> __m128i; - fn _mm_castps_pd(a: __m128) -> __m128d; - fn _mm_castps_si128(a: __m128) -> __m128i; - fn _mm_castsi128_pd(a: __m128i) -> __m128d; - fn _mm_castsi128_ps(a: __m128i) -> __m128; - fn _mm_undefined_pd() -> __m128d; - fn _mm_undefined_si128() -> __m128i; - fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d; - } -} diff --git a/fearless_simd/src/core_arch/x86/sse3.rs b/fearless_simd/src/core_arch/x86/sse3.rs deleted file mode 100644 index 45ec21ae..00000000 --- a/fearless_simd/src/core_arch/x86/sse3.rs +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to SSE3 intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for SSE3 intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Sse3 { - _private: (), -} - -#[expect( - clippy::missing_safety_doc, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] -impl Sse3 { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128; - fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128; - fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128; - unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i; - fn _mm_movedup_pd(a: __m128d) -> __m128d; - unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d; - fn _mm_movehdup_ps(a: __m128) -> __m128; - fn _mm_moveldup_ps(a: __m128) -> __m128; - } -} diff --git a/fearless_simd/src/core_arch/x86/sse4_1.rs b/fearless_simd/src/core_arch/x86/sse4_1.rs deleted file mode 100644 index 814e8d29..00000000 --- a/fearless_simd/src/core_arch/x86/sse4_1.rs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to SSE4.1 intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for SSE4.1 intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Sse4_1 { - _private: (), -} - -impl Sse4_1 { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i; - fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d; - fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128; - fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_blend_ps(a: __m128, b: __m128) -> __m128; - fn _mm_extract_ps(a: __m128) -> i32; - fn _mm_extract_epi8(a: __m128i) -> i32; - fn _mm_extract_epi32(a: __m128i) -> i32; - fn _mm_insert_ps(a: __m128, b: __m128) -> __m128; - fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i; - fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i; - fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i; - fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i; - fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i; - fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i; - fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i; - fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i; - fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i; - fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i; - fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i; - fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i; - fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i; - fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i; - fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_dp_ps(a: __m128, b: __m128) -> __m128; - fn _mm_floor_pd(a: __m128d) -> __m128d; - fn _mm_floor_ps(a: __m128) -> __m128; - fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_floor_ss(a: __m128, b: __m128) -> __m128; - fn _mm_ceil_pd(a: __m128d) -> __m128d; - fn _mm_ceil_ps(a: __m128) -> __m128; - fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128; - fn _mm_round_pd(a: __m128d) -> __m128d; - fn _mm_round_ps(a: __m128) -> __m128; - fn _mm_round_sd(a: __m128d, b: __m128d) -> __m128d; - fn _mm_round_ss(a: __m128, b: __m128) -> __m128; - fn _mm_minpos_epu16(a: __m128i) -> __m128i; - fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32; - fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32; - fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32; - fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32; - fn _mm_test_all_ones(a: __m128i) -> i32; - fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32; - } -} diff --git a/fearless_simd/src/core_arch/x86/sse4_2.rs b/fearless_simd/src/core_arch/x86/sse4_2.rs deleted file mode 100644 index 16f6d46a..00000000 --- a/fearless_simd/src/core_arch/x86/sse4_2.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to SSE4.2 intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for SSE4.2 intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Sse4_2 { - _private: (), -} - -impl Sse4_2 { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_cmpistrm(a: __m128i, b: __m128i) -> __m128i; - fn _mm_cmpistri(a: __m128i, b: __m128i) -> i32; - fn _mm_cmpistrz(a: __m128i, b: __m128i) -> i32; - fn _mm_cmpistrc(a: __m128i, b: __m128i) -> i32; - fn _mm_cmpistrs(a: __m128i, b: __m128i) -> i32; - fn _mm_cmpistro(a: __m128i, b: __m128i) -> i32; - fn _mm_cmpistra(a: __m128i, b: __m128i) -> i32; - fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i; - fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32; - fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32; - fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32; - fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32; - fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32; - fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32; - fn _mm_crc32_u8(crc: u32, v: u8) -> u32; - fn _mm_crc32_u16(crc: u32, v: u16) -> u32; - fn _mm_crc32_u32(crc: u32, v: u32) -> u32; - fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i; - } -} diff --git a/fearless_simd/src/core_arch/x86/ssse3.rs b/fearless_simd/src/core_arch/x86/ssse3.rs deleted file mode 100644 index 3314acc8..00000000 --- a/fearless_simd/src/core_arch/x86/ssse3.rs +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Access to SSSE3 intrinsics. - -use crate::impl_macros::delegate; -#[cfg(target_arch = "x86")] -use core::arch::x86 as arch; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64 as arch; - -use arch::*; - -/// A token for SSSE3 intrinsics on `x86` and `x86_64`. -#[derive(Clone, Copy, Debug)] -pub struct Ssse3 { - _private: (), -} - -impl Ssse3 { - /// Create a SIMD token. - /// - /// # Safety - /// - /// The required CPU features must be available. - #[inline] - pub const unsafe fn new_unchecked() -> Self { - Self { _private: () } - } - - delegate! { arch: - fn _mm_abs_epi8(a: __m128i) -> __m128i; - fn _mm_abs_epi16(a: __m128i) -> __m128i; - fn _mm_abs_epi32(a: __m128i) -> __m128i; - fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i; - fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i; - fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i; - } -} diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index 3d607018..f6169489 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -14,10 +14,10 @@ use crate::{ use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; -#[doc = "The SIMD token for the x86-64-v3 level."] +#[doc = "A token for AVX2 intrinsics on `x86` and `x86_64`, representing the x86-64-v3 level."] #[derive(Clone, Copy, Debug)] pub struct Avx2 { - pub avx2: crate::core_arch::x86::Avx2, + _private: (), } impl Avx2 { #[doc = r" Create a SIMD token."] @@ -29,9 +29,7 @@ impl Avx2 { #[doc = r" be available."] #[inline] pub const unsafe fn new_unchecked() -> Self { - Self { - avx2: unsafe { crate::core_arch::x86::Avx2::new_unchecked() }, - } + Self { _private: () } } } impl Seal for Avx2 {} diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index 253ae7ac..2449b569 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -74,17 +74,15 @@ impl FloatExt for f64 { libm::trunc(self) } } -#[doc = "The SIMD token for the \"fallback\" level."] +#[doc = "A token for scalar fallback SIMD, representing the \"fallback\" level."] #[derive(Clone, Copy, Debug)] pub struct Fallback { - pub fallback: crate::core_arch::fallback::Fallback, + _private: (), } impl Fallback { #[inline] pub const fn new() -> Self { - Self { - fallback: crate::core_arch::fallback::Fallback::new(), - } + Self { _private: () } } } impl Seal for Fallback {} diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index f8a2ded9..d6d38db4 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -11,17 +11,15 @@ use crate::{ u32x4, u32x8, u32x16, }; use core::arch::aarch64::*; -#[doc = "The SIMD token for the \"neon\" level."] +#[doc = "A token for Neon intrinsics on aarch64, representing the \"neon\" level."] #[derive(Clone, Copy, Debug)] pub struct Neon { - pub neon: crate::core_arch::aarch64::Neon, + _private: (), } impl Neon { #[inline] pub const unsafe fn new_unchecked() -> Self { - Neon { - neon: unsafe { crate::core_arch::aarch64::Neon::new_unchecked() }, - } + Neon { _private: () } } } impl Seal for Neon {} diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index ab35d94f..98dcf2fc 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -14,10 +14,10 @@ use crate::{ use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; -#[doc = "The SIMD token for the x86-64-v2 level."] +#[doc = "A token for SSE4.2 intrinsics on `x86` and `x86_64`, representing the x86-64-v2 level."] #[derive(Clone, Copy, Debug)] pub struct Sse4_2 { - pub sse4_2: crate::core_arch::x86::Sse4_2, + _private: (), } impl Sse4_2 { #[doc = r" Create a SIMD token."] @@ -28,9 +28,7 @@ impl Sse4_2 { #[doc = r" be available."] #[inline] pub const unsafe fn new_unchecked() -> Self { - Sse4_2 { - sse4_2: unsafe { crate::core_arch::x86::Sse4_2::new_unchecked() }, - } + Sse4_2 { _private: () } } } impl Seal for Sse4_2 {} diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 74255c06..ce07f344 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -11,17 +11,15 @@ use crate::{ u32x4, u32x8, u32x16, }; use core::arch::wasm32::*; -#[doc = "The SIMD token for the \"wasm128\" level."] +#[doc = "A token for WASM SIMD128, representing the \"wasm128\" level."] #[derive(Clone, Copy, Debug)] pub struct WasmSimd128 { - pub wasmsimd128: crate::core_arch::wasm32::WasmSimd128, + _private: (), } impl WasmSimd128 { #[inline] pub const fn new_unchecked() -> Self { - Self { - wasmsimd128: crate::core_arch::wasm32::WasmSimd128::new(), - } + Self { _private: () } } } impl Seal for WasmSimd128 {} diff --git a/fearless_simd/src/impl_macros.rs b/fearless_simd/src/impl_macros.rs deleted file mode 100644 index d3940df6..00000000 --- a/fearless_simd/src/impl_macros.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2024 the Fearless_SIMD Authors -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! Macros used by implementations - -#![allow( - unused_macros, - unused_imports, - reason = "Not all macros will be used by all implementations" -)] - -// Adapted from similar macro in pulp -macro_rules! delegate { - ( $prefix:path : $( - $(#[$attr: meta])* - $(unsafe $($placeholder: lifetime)?)? - fn $func: ident $(<$(const $generic: ident: $generic_ty: ty),* $(,)?>)?( - $($arg: ident: $ty: ty),* $(,)? - ) $(-> $ret: ty)?; - )*) => { - $( - #[doc=concat!("See [`", stringify!($prefix), "::", stringify!($func), "`].")] - $(#[$attr])* - #[inline(always)] - pub $(unsafe $($placeholder)?)? - fn $func $(<$(const $generic: $generic_ty),*>)?(self, $($arg: $ty),*) $(-> $ret)? { - unsafe { $func $(::<$($generic,)*>)?($($arg,)*) } - } - )* - }; -} -pub(crate) use delegate; diff --git a/fearless_simd/src/lib.rs b/fearless_simd/src/lib.rs index 145df9f1..a8c44d00 100644 --- a/fearless_simd/src/lib.rs +++ b/fearless_simd/src/lib.rs @@ -113,8 +113,6 @@ //! - `std` (enabled by default): Get floating point functions from the standard library (likely using your target's libc). //! Also allows using [`Level::new`] on all platforms, to detect which target features are enabled. //! - `libm`: Use floating point implementations from [libm]. -//! - `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics, -//! beyond the basic SIMD operations abstracted on all platforms. //! - `force_support_fallback`: Force scalar fallback, to be supported, even if your compilation target has a better baseline. //! //! At least one of `std` and `libm` is required; `std` overrides `libm`. @@ -144,9 +142,6 @@ compile_error!("fearless_simd requires either the `std` or `libm` feature"); #[cfg(all(feature = "std", feature = "libm"))] use libm as _; -pub mod core_arch; -mod impl_macros; - mod generated; mod kernel_macros; mod macros; @@ -417,8 +412,6 @@ impl Level { /// because if Fearless SIMD gets support for an instruction set which is a superset of Neon, /// this method will return a value even if that "better" instruction set is available. /// - /// This can be used in combination with the `safe_wrappers` feature to gain checked access to - /// the level-specific SIMD capabilities. #[cfg(target_arch = "aarch64")] #[inline] pub fn as_neon(self) -> Option { @@ -438,8 +431,6 @@ impl Level { /// because if Fearless SIMD gets support for an instruction set which is a superset of SIMD 128, /// this method will return a value even if that "better" instruction set is available. /// - /// This can be used in combination with the `safe_wrappers` feature to gain checked access to - /// the level-specific SIMD capabilities. #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] #[inline] pub fn as_wasm_simd128(self) -> Option { @@ -459,8 +450,6 @@ impl Level { /// because if Fearless SIMD gets support for an instruction set which is a superset of SSE4.2, /// this method will return a value even if that "better" instruction set is available. /// - /// This can be used in combination with the `safe_wrappers` feature to gain checked access to - /// the level-specific SIMD capabilities. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[inline] pub fn as_sse4_2(self) -> Option { @@ -496,8 +485,6 @@ impl Level { /// because if Fearless SIMD gets support for an instruction set which is a superset of AVX2, /// this method will return a value even if that "better" instruction set is available. /// - /// This can be used in combination with the `safe_wrappers` feature to gain checked access to - /// the level-specific SIMD capabilities. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[inline] pub fn as_avx2(self) -> Option { diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index 0bfe0623..c4800698 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -36,9 +36,6 @@ pub(crate) trait Level { fn arch_ty(&self, vec_ty: &VecType) -> TokenStream; /// The docstring for this SIMD level token. fn token_doc(&self) -> &'static str; - /// The full path to the `core_arch` token wrapped by this SIMD level token. - fn token_inner(&self) -> TokenStream; - /// Any additional imports or supporting code necessary for the module (for instance, importing /// implementation-specific functions from `core::arch`). fn make_module_prelude(&self) -> TokenStream; @@ -216,8 +213,6 @@ pub(crate) trait Level { fn make_module(&self) -> TokenStream { let level_tok = self.token(); let token_doc = self.token_doc(); - let field_name = Ident::new(&self.name().to_ascii_lowercase(), Span::call_site()); - let token_inner = self.token_inner(); let imports = type_imports(); let module_prelude = self.make_module_prelude(); let impl_body = self.make_impl_body(); @@ -236,7 +231,7 @@ pub(crate) trait Level { #[doc = #token_doc] #[derive(Clone, Copy, Debug)] pub struct #level_tok { - pub #field_name: #token_inner, + _private: (), } impl #level_tok { diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index b4859424..d3258df2 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -36,11 +36,7 @@ impl Level for Fallback { } fn token_doc(&self) -> &'static str { - r#"The SIMD token for the "fallback" level."# - } - - fn token_inner(&self) -> TokenStream { - quote!(crate::core_arch::fallback::Fallback) + r#"A token for scalar fallback SIMD, representing the "fallback" level."# } fn make_module_prelude(&self) -> TokenStream { @@ -128,9 +124,7 @@ impl Level for Fallback { quote! { #[inline] pub const fn new() -> Self { - Self { - fallback: crate::core_arch::fallback::Fallback::new(), - } + Self { _private: () } } } } diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index ad356dfa..4b2f5f75 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -53,11 +53,7 @@ impl Level for Neon { } fn token_doc(&self) -> &'static str { - r#"The SIMD token for the "neon" level."# - } - - fn token_inner(&self) -> TokenStream { - quote!(crate::core_arch::aarch64::Neon) + r#"A token for Neon intrinsics on aarch64, representing the "neon" level."# } fn make_module_prelude(&self) -> TokenStream { @@ -74,9 +70,7 @@ impl Level for Neon { quote! { #[inline] pub const unsafe fn new_unchecked() -> Self { - Neon { - neon: unsafe { crate::core_arch::aarch64::Neon::new_unchecked() }, - } + Neon { _private: () } } } } diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 70577776..13ab4fae 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -42,11 +42,7 @@ impl Level for WasmSimd128 { } fn token_doc(&self) -> &'static str { - r#"The SIMD token for the "wasm128" level."# - } - - fn token_inner(&self) -> TokenStream { - quote!(crate::core_arch::wasm32::WasmSimd128) + r#"A token for WASM SIMD128, representing the "wasm128" level."# } fn make_module_prelude(&self) -> TokenStream { @@ -63,7 +59,7 @@ impl Level for WasmSimd128 { quote! { #[inline] pub const fn new_unchecked() -> Self { - Self { wasmsimd128: crate::core_arch::wasm32::WasmSimd128::new() } + Self { _private: () } } } } diff --git a/fearless_simd_gen/src/mk_x86.rs b/fearless_simd_gen/src/mk_x86.rs index e0c26949..3389393f 100644 --- a/fearless_simd_gen/src/mk_x86.rs +++ b/fearless_simd_gen/src/mk_x86.rs @@ -61,15 +61,12 @@ impl Level for X86 { fn token_doc(&self) -> &'static str { match self { - Self::Sse4_2 => "The SIMD token for the x86-64-v2 level.", - Self::Avx2 => "The SIMD token for the x86-64-v3 level.", - } - } - - fn token_inner(&self) -> TokenStream { - match self { - Self::Sse4_2 => quote!(crate::core_arch::x86::Sse4_2), - Self::Avx2 => quote!(crate::core_arch::x86::Avx2), + Self::Sse4_2 => { + "A token for SSE4.2 intrinsics on `x86` and `x86_64`, representing the x86-64-v2 level." + } + Self::Avx2 => { + "A token for AVX2 intrinsics on `x86` and `x86_64`, representing the x86-64-v3 level." + } } } @@ -145,9 +142,7 @@ impl Level for X86 { /// be available. #[inline] pub const unsafe fn new_unchecked() -> Self { - Sse4_2 { - sse4_2: unsafe { crate::core_arch::x86::Sse4_2::new_unchecked() }, - } + Sse4_2 { _private: () } } }, Self::Avx2 => quote! { @@ -160,9 +155,7 @@ impl Level for X86 { /// be available. #[inline] pub const unsafe fn new_unchecked() -> Self { - Self { - avx2: unsafe { crate::core_arch::x86::Avx2::new_unchecked() }, - } + Self { _private: () } } }, }